framework,version,device,op_name,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,64,1,128,1,float16,fp8,0,72.35875447591145
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,64,1,128,1,float16,float16,0,73.12306722005208
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,64,1,128,1,fp8,fp8,0,43.02898661295573
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,64,2,128,1,fp8,fp8,0,42.76592508951823
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,64,2,128,1,float16,float16,0,71.23563130696614
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,64,2,128,1,float16,fp8,0,72.11245727539062
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,64,4,128,1,float16,float16,0,73.28969828287761
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,64,4,128,1,float16,fp8,0,70.05515543619792
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,1,128,1,float16,float16,0,32.48639424641927
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,64,4,128,1,fp8,fp8,0,42.99706013997396
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,64,8,128,1,fp8,fp8,0,43.35393269856771
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,64,8,128,1,float16,float16,0,73.12490336100261
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,64,8,128,1,float16,fp8,0,72.34548950195312
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,1,128,1,fp8,fp8,0,21.61479949951172
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,1,128,1,float16,fp8,0,32.3327891031901
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,64,128,1,fp8,fp8,0,21.590596516927082
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,64,128,1,float16,fp8,0,32.508288065592446
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,2,128,1,float16,fp8,0,33.3493398030599
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,2,128,1,float16,float16,0,33.15890757242838
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,64,128,1,float16,float16,0,32.4642333984375
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,2,128,1,fp8,fp8,0,21.612772623697918
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,4,128,1,float16,float16,0,33.10906219482422
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,4,128,1,float16,fp8,0,32.52488454182943
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,4,128,1,fp8,fp8,0,21.895823160807293
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,1,128,1,float16,float16,0,16.44037373860677
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,8,128,1,float16,float16,0,33.11307271321615
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,8,128,1,float16,fp8,0,33.17347208658854
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,64,8,128,1,fp8,fp8,0,21.736928304036457
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,1,128,1,float16,fp8,0,16.927413940429688
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,64,128,1,float16,float16,0,16.425434112548828
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,1,128,1,fp8,fp8,0,10.81149927775065
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,64,128,1,fp8,fp8,0,11.006202697753906
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,2,128,1,float16,float16,0,16.771162668863933
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,2,128,1,float16,fp8,0,16.626283009847004
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,64,128,1,float16,fp8,0,16.56173324584961
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,2,128,1,fp8,fp8,0,10.988175710042318
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,4,128,1,float16,float16,0,17.056490580240887
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,4,128,1,float16,fp8,0,16.55529022216797
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,4,128,1,fp8,fp8,0,10.876448313395182
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,8,128,1,float16,float16,0,16.77167510986328
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,8,128,1,float16,fp8,0,16.699429829915363
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,64,8,128,1,fp8,fp8,0,11.060624440511068
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,1,128,1,float16,float16,0,8.464309056599935
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,64,128,1,float16,float16,0,8.510677337646484
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,1,128,1,float16,fp8,0,8.40823491414388
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,64,128,1,fp8,fp8,0,5.763792037963867
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,1,128,1,fp8,fp8,0,5.435445149739583
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,64,128,1,float16,fp8,0,8.408613204956055
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,2,128,1,float16,fp8,0,8.538085301717123
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,2,128,1,float16,float16,0,8.598634719848633
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,2,128,1,fp8,fp8,0,5.568096160888672
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,4,128,1,float16,float16,0,8.574149449666342
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,4,128,1,float16,fp8,0,8.36129061381022
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,4,128,1,fp8,fp8,0,5.498821258544922
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,8,128,1,float16,float16,0,8.632293065388998
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,8,128,1,fp8,fp8,0,5.683151880900065
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,64,8,128,1,float16,fp8,0,8.466927846272787
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,64,1,128,1,float16,fp8,0,37.78607940673828
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,64,1,128,1,float16,float16,0,37.998046875
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,64,1,128,1,fp8,fp8,0,25.20765431722005
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,64,2,128,1,fp8,fp8,0,25.152793884277344
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,64,2,128,1,float16,float16,0,37.51702880859375
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,64,2,128,1,float16,fp8,0,38.24817148844401
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,64,4,128,1,float16,float16,0,38.28027852376302
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,64,4,128,1,float16,fp8,0,37.9993642171224
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,64,4,128,1,fp8,fp8,0,24.809961954752605
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,64,8,128,1,float16,float16,0,38.074127197265625
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,64,8,128,1,fp8,fp8,0,25.488746643066406
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,64,8,128,1,float16,fp8,0,37.92986551920573
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,1,128,1,float16,float16,0,19.10788853963216
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,64,128,1,fp8,fp8,0,12.979600270589193
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,64,128,1,float16,float16,0,18.721941630045574
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,1,128,1,float16,fp8,0,18.951744079589844
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,64,128,1,float16,fp8,0,18.98147710164388
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,1,128,1,fp8,fp8,0,12.550586700439453
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,2,128,1,float16,float16,0,19.22523244222005
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,2,128,1,float16,fp8,0,18.852976481119793
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,2,128,1,fp8,fp8,0,12.73422876993815
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,4,128,1,float16,float16,0,19.414778391520183
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,4,128,1,fp8,fp8,0,12.651658376057943
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,4,128,1,float16,fp8,0,18.77803675333659
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,8,128,1,float16,float16,0,19.4386723836263
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,1,128,1,float16,float16,0,9.737120310465494
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,8,128,1,float16,fp8,0,19.04909896850586
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,1,128,1,float16,fp8,0,9.512453079223633
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,64,8,128,1,fp8,fp8,0,12.724597930908203
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,1,128,1,fp8,fp8,0,6.473717371622722
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,64,128,1,float16,float16,0,9.575983683268229
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,2,128,1,float16,float16,0,9.715189615885416
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,2,128,1,float16,fp8,0,9.65780258178711
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,64,128,1,fp8,fp8,0,6.490869522094727
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,64,128,1,float16,fp8,0,9.54629898071289
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,2,128,1,fp8,fp8,0,6.348672231038411
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,4,128,1,float16,float16,0,9.88812255859375
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,4,128,1,float16,fp8,0,9.819098790486654
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,4,128,1,fp8,fp8,0,6.248181025187175
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,1,128,1,float16,float16,0,4.982618649800618
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,8,128,1,float16,float16,0,9.702981313069662
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,8,128,1,fp8,fp8,0,6.441274642944336
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,64,8,128,1,float16,fp8,0,9.567536036173502
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,1,128,1,float16,fp8,0,4.677061398824056
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,64,128,1,float16,float16,0,4.7410933176676435
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,1,128,1,fp8,fp8,0,3.2962560653686523
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,64,128,1,float16,fp8,0,4.812549273173015
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,64,128,1,fp8,fp8,0,3.286714553833008
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,2,128,1,float16,float16,0,4.786981264750163
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,2,128,1,float16,fp8,0,4.7098134358723955
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,2,128,1,fp8,fp8,0,3.2719252904256186
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,4,128,1,float16,float16,0,4.9005279541015625
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,4,128,1,float16,fp8,0,4.707680066426595
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,4,128,1,fp8,fp8,0,3.3286027908325195
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,8,128,1,float16,float16,0,4.782378514607747
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,8,128,1,float16,fp8,0,4.9073225657145185
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,64,8,128,1,fp8,fp8,0,3.261610666910807
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,64,1,128,1,float16,float16,0,27.150095621744793
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,64,1,128,1,float16,fp8,0,26.86389923095703
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,64,1,128,1,fp8,fp8,0,17.836959838867188
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,64,2,128,1,fp8,fp8,0,17.660933176676433
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,64,2,128,1,float16,float16,0,27.106905619303387
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,64,2,128,1,float16,fp8,0,27.24017079671224
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,64,4,128,1,float16,float16,0,27.0592524210612
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,64,4,128,1,float16,fp8,0,26.741892496744793
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,1,128,1,float16,float16,0,13.547200520833334
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,64,4,128,1,fp8,fp8,0,18.170480092366535
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,64,8,128,1,float16,fp8,0,26.908345540364582
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,64,8,128,1,float16,float16,0,27.20355224609375
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,64,8,128,1,fp8,fp8,0,17.816448211669922
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,1,128,1,float16,fp8,0,13.547247568766275
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,1,128,1,fp8,fp8,0,9.036357243855795
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,64,128,1,float16,fp8,0,13.767466227213541
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,64,128,1,fp8,fp8,0,9.319173177083334
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,64,128,1,float16,float16,0,13.533008575439453
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,2,128,1,float16,fp8,0,13.520314534505209
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,2,128,1,float16,float16,0,13.496437072753906
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,2,128,1,fp8,fp8,0,8.94873046875
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,4,128,1,float16,float16,0,13.538874308268229
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,4,128,1,float16,fp8,0,13.601637522379557
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,4,128,1,fp8,fp8,0,9.09763209025065
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,8,128,1,float16,fp8,0,13.542218526204428
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,8,128,1,float16,float16,0,13.71185557047526
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,1,128,1,float16,float16,0,6.804821650187175
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,1,128,1,float16,fp8,0,6.882373174031575
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,64,8,128,1,fp8,fp8,0,9.080341339111328
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,64,128,1,float16,float16,0,6.636058807373047
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,1,128,1,fp8,fp8,0,4.554992039998372
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,2,128,1,float16,float16,0,6.856751759847005
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,64,128,1,float16,fp8,0,6.548245112101237
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,2,128,1,float16,fp8,0,6.771999994913737
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,2,128,1,fp8,fp8,0,4.5809173583984375
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,64,128,1,fp8,fp8,0,4.635973294576009
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,4,128,1,float16,float16,0,6.82864507039388
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,4,128,1,float16,fp8,0,6.797461191813151
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,4,128,1,fp8,fp8,0,4.457034746805827
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,8,128,1,float16,float16,0,6.740581512451172
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,8,128,1,fp8,fp8,0,4.585557301839192
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,64,8,128,1,float16,fp8,0,6.640485127766927
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,1,128,1,float16,float16,0,3.4083681106567383
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,1,128,1,float16,fp8,0,3.4374186197916665
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,1,128,1,fp8,fp8,0,2.3930506706237793
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,64,128,1,float16,float16,0,3.38154665629069
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,64,128,1,float16,fp8,0,3.3376213709513345
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,64,128,1,fp8,fp8,0,2.3927413622538247
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,2,128,1,float16,float16,0,3.494842529296875
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,2,128,1,float16,fp8,0,3.3405332565307617
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,2,128,1,fp8,fp8,0,2.384431997934977
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,4,128,1,float16,float16,0,3.4293492635091147
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,4,128,1,float16,fp8,0,3.4021387100219727
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,4,128,1,fp8,fp8,0,2.3876852989196777
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,8,128,1,float16,float16,0,3.465653419494629
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,8,128,1,float16,fp8,0,3.4497706095377603
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,64,8,128,1,fp8,fp8,0,2.400400002797445
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,64,1,128,1,fp8,fp8,0,23.98076883951823
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,64,1,128,1,float16,float16,0,34.92839558919271
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,64,1,128,1,float16,fp8,0,35.34219106038412
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,64,2,128,1,float16,float16,0,35.49749247233073
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,64,2,128,1,float16,fp8,0,35.762672424316406
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,64,2,128,1,fp8,fp8,0,23.709370930989582
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,64,4,128,1,float16,fp8,0,35.64126332600912
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,64,4,128,1,float16,float16,0,35.682342529296875
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,1,128,1,float16,float16,0,17.83517328898112
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,64,4,128,1,fp8,fp8,0,24.083567301432293
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,1,128,1,float16,fp8,0,17.649935404459637
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,64,8,128,1,float16,fp8,0,35.45020294189453
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,64,8,128,1,fp8,fp8,0,24.209307352701824
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,64,8,128,1,float16,float16,0,35.27241007486979
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,64,128,1,float16,float16,0,17.96358362833659
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,64,128,1,fp8,fp8,0,12.399339040120443
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,64,128,1,float16,fp8,0,17.830154418945312
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,1,128,1,fp8,fp8,0,11.97494888305664
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,2,128,1,float16,float16,0,17.77728525797526
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,2,128,1,fp8,fp8,0,12.048826853434244
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,2,128,1,float16,fp8,0,17.663514455159504
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,4,128,1,float16,float16,0,17.694469451904297
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,4,128,1,float16,fp8,0,18.035354614257812
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,4,128,1,fp8,fp8,0,12.100580851236979
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,8,128,1,float16,fp8,0,17.94265619913737
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,8,128,1,float16,float16,0,17.849674224853516
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,1,128,1,float16,float16,0,8.968512217203775
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,1,128,1,float16,fp8,0,8.960981369018555
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,64,8,128,1,fp8,fp8,0,12.032394409179688
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,1,128,1,fp8,fp8,0,5.810698827107747
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,2,128,1,float16,float16,0,8.95306142171224
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,64,128,1,float16,float16,0,9.025413513183594
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,64,128,1,float16,fp8,0,8.852197647094727
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,2,128,1,fp8,fp8,0,5.873013178507487
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,2,128,1,float16,fp8,0,8.952447891235352
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,64,128,1,fp8,fp8,0,6.0891571044921875
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,4,128,1,float16,float16,0,9.121423721313477
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,4,128,1,float16,fp8,0,9.036682764689127
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,4,128,1,fp8,fp8,0,5.795749028523763
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,8,128,1,float16,float16,0,8.960005442301432
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,8,128,1,float16,fp8,0,8.84498659769694
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,64,8,128,1,fp8,fp8,0,5.904314676920573
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,1,128,1,float16,float16,0,4.3451948165893555
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,1,128,1,float16,fp8,0,4.341957410176595
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,64,128,1,float16,float16,0,4.568746566772461
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,1,128,1,fp8,fp8,0,3.0282293955485025
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,64,128,1,float16,fp8,0,4.3468319574991865
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,2,128,1,float16,float16,0,4.349989255269368
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,2,128,1,float16,fp8,0,4.292223930358887
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,64,128,1,fp8,fp8,0,3.091024080912272
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,2,128,1,fp8,fp8,0,3.0154027938842773
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,4,128,1,float16,float16,0,4.426181475321452
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,4,128,1,float16,fp8,0,4.277722676595052
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,4,128,1,fp8,fp8,0,3.029120127360026
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,8,128,1,float16,float16,0,4.464960098266602
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,8,128,1,fp8,fp8,0,3.042917251586914
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,64,8,128,1,float16,fp8,0,4.361818631490071
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,1,128,1,float16,float16,0,2.3044427235921225
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,1,128,1,float16,fp8,0,2.289584000905355
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,64,128,1,float16,fp8,0,2.28329070409139
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,64,128,1,float16,float16,0,2.2743199666341147
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,1,128,1,fp8,fp8,0,1.6401546796162922
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,64,128,1,fp8,fp8,0,1.6652480761210124
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,2,128,1,float16,float16,0,2.319749355316162
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,2,128,1,float16,fp8,0,2.275183995564779
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,2,128,1,fp8,fp8,0,1.644063949584961
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,4,128,1,float16,fp8,0,2.2855040232340493
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,4,128,1,float16,float16,0,2.3242239952087402
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,4,128,1,fp8,fp8,0,1.6443999608357747
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,8,128,1,float16,float16,0,2.2908053398132324
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,8,128,1,float16,fp8,0,2.272319952646891
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,64,8,128,1,fp8,fp8,0,1.6578559875488281
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,64,1,128,1,fp8,fp8,0,14.323312123616537
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,64,1,128,1,float16,float16,0,20.98147710164388
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,64,1,128,1,float16,fp8,0,20.94708760579427
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,64,2,128,1,float16,float16,0,21.075653076171875
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,64,2,128,1,float16,fp8,0,20.622997283935547
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,64,2,128,1,fp8,fp8,0,14.287247975667318
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,64,4,128,1,float16,fp8,0,21.12174352010091
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,64,4,128,1,float16,float16,0,21.233253479003906
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,1,128,1,float16,float16,0,10.495967864990234
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,64,4,128,1,fp8,fp8,0,14.455280303955078
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,64,8,128,1,float16,float16,0,21.180741628011067
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,1,128,1,float16,fp8,0,10.375375747680664
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,64,8,128,1,float16,fp8,0,20.96841557820638
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,64,8,128,1,fp8,fp8,0,14.536730448404947
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,64,128,1,float16,float16,0,10.574239730834961
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,64,128,1,float16,fp8,0,10.329610824584961
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,64,128,1,fp8,fp8,0,7.632256189982097
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,1,128,1,fp8,fp8,0,7.153280258178711
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,2,128,1,float16,fp8,0,10.381584167480469
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,2,128,1,float16,float16,0,10.411834716796875
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,2,128,1,fp8,fp8,0,7.282554626464844
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,4,128,1,float16,float16,0,10.543226877848307
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,4,128,1,float16,fp8,0,10.47261873881022
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,4,128,1,fp8,fp8,0,7.119109471638997
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,8,128,1,float16,float16,0,10.63257090250651
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,8,128,1,float16,fp8,0,10.5228640238444
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,1,128,1,float16,float16,0,5.31385072072347
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,1,128,1,float16,fp8,0,4.889520009358724
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,64,8,128,1,fp8,fp8,0,7.183610916137695
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,1,128,1,fp8,fp8,0,3.566410700480143
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,2,128,1,float16,float16,0,5.05567995707194
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,64,128,1,float16,float16,0,5.258693377176921
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,64,128,1,float16,fp8,0,5.190789222717285
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,64,128,1,fp8,fp8,0,3.74729061126709
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,2,128,1,float16,fp8,0,5.0092213948567705
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,2,128,1,fp8,fp8,0,3.5357227325439453
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,4,128,1,float16,float16,0,5.026149431864421
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,4,128,1,float16,fp8,0,5.272453308105469
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,4,128,1,fp8,fp8,0,3.5298401514689126
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,8,128,1,float16,fp8,0,4.97383467356364
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,8,128,1,float16,float16,0,5.220239957173665
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,64,8,128,1,fp8,fp8,0,3.5572532018025718
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,1,128,1,float16,float16,0,2.616410732269287
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,1,128,1,float16,fp8,0,2.5725013415018716
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,64,128,1,float16,float16,0,2.592847983042399
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,1,128,1,fp8,fp8,0,1.858191967010498
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,2,128,1,float16,float16,0,2.613839944203695
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,64,128,1,float16,fp8,0,2.57205867767334
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,2,128,1,float16,fp8,0,2.583669344584147
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,2,128,1,fp8,fp8,0,1.866378625233968
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,4,128,1,float16,float16,0,2.625050703684489
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,64,128,1,fp8,fp8,0,1.924458662668864
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,4,128,1,float16,fp8,0,2.5583413441975913
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,4,128,1,fp8,fp8,0,1.8637866973876953
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,8,128,1,float16,float16,0,2.6104000409444175
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,8,128,1,float16,fp8,0,2.552677313486735
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,64,8,128,1,fp8,fp8,0,1.8799626032511394
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,64,128,1,float16,float16,0,1.4195946057637532
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,64,128,1,float16,fp8,0,1.4024906158447266
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,1,128,1,float16,float16,0,1.4109546343485515
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,64,128,1,fp8,fp8,0,1.0623892943064372
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,1,128,1,float16,fp8,0,1.3894079526265461
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,1,128,1,fp8,fp8,0,1.0417386690775554
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,2,128,1,float16,float16,0,1.4075573285420735
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,2,128,1,float16,fp8,0,1.4005227088928223
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,2,128,1,fp8,fp8,0,1.0462079842885335
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,4,128,1,float16,float16,0,1.4216373761494954
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,4,128,1,fp8,fp8,0,1.0428266525268555
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,4,128,1,float16,fp8,0,1.389370600382487
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,8,128,1,float16,float16,0,1.4188586870829265
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,8,128,1,float16,fp8,0,1.3960107167561848
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,64,8,128,1,fp8,fp8,0,1.0469280083974202
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,64,1,128,1,float16,float16,0,20.072901407877605
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,64,1,128,1,fp8,fp8,0,14.421253204345703
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,64,1,128,1,float16,fp8,0,20.25644810994466
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,64,2,128,1,fp8,fp8,0,14.570640563964844
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,64,2,128,1,float16,fp8,0,20.237723032633465
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,64,2,128,1,float16,float16,0,20.053248087565105
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,64,4,128,1,float16,float16,0,20.3124262491862
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,64,4,128,1,float16,fp8,0,20.10598882039388
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,1,128,1,float16,float16,0,10.120208104451498
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,64,4,128,1,fp8,fp8,0,14.444992065429688
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,64,8,128,1,float16,float16,0,20.317338307698567
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,64,8,128,1,float16,fp8,0,20.330362955729168
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,64,8,128,1,fp8,fp8,0,14.780165354410807
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,1,128,1,float16,fp8,0,9.919951756795248
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,64,128,1,float16,fp8,0,10.404458363850912
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,64,128,1,float16,float16,0,10.43723169962565
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,1,128,1,fp8,fp8,0,7.148864110310872
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,64,128,1,fp8,fp8,0,7.563610712687175
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,2,128,1,float16,float16,0,10.164325078328451
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,2,128,1,float16,fp8,0,10.165269215901693
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,2,128,1,fp8,fp8,0,7.280368169148763
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,4,128,1,float16,float16,0,10.198970794677734
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,4,128,1,float16,fp8,0,10.054672241210938
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,4,128,1,fp8,fp8,0,7.0509599049886065
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,8,128,1,float16,float16,0,10.373765309651693
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,8,128,1,float16,fp8,0,10.174896240234375
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,1,128,1,float16,float16,0,4.883077303568522
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,64,8,128,1,fp8,fp8,0,7.182949066162109
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,1,128,1,float16,fp8,0,4.718090693155925
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,64,128,1,float16,float16,0,5.103647867838542
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,1,128,1,fp8,fp8,0,3.5090773900349936
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,64,128,1,float16,fp8,0,4.978618621826172
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,2,128,1,float16,float16,0,4.896656036376953
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,2,128,1,float16,fp8,0,4.739578564961751
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,2,128,1,fp8,fp8,0,3.5040159225463867
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,4,128,1,float16,float16,0,4.93668270111084
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,4,128,1,float16,fp8,0,4.994640032450358
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,4,128,1,fp8,fp8,0,3.518474578857422
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,8,128,1,float16,float16,0,4.847119967142741
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,64,128,1,fp8,fp8,0,3.714405377705892
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,8,128,1,float16,fp8,0,4.768399874369304
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,64,8,128,1,fp8,fp8,0,3.5303627649943032
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,1,128,1,float16,float16,0,2.500341256459554
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,64,128,1,float16,float16,0,2.553071975708008
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,1,128,1,float16,fp8,0,2.434650739034017
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,1,128,1,fp8,fp8,0,1.8127466837565105
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,2,128,1,float16,float16,0,2.4784587224324546
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,64,128,1,float16,fp8,0,2.4957067171732583
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,2,128,1,float16,fp8,0,2.4296746253967285
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,2,128,1,fp8,fp8,0,1.8225119908650715
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,64,128,1,fp8,fp8,0,1.9607307116190593
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,4,128,1,float16,float16,0,2.4934825897216797
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,4,128,1,float16,fp8,0,2.4432266553243003
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,4,128,1,fp8,fp8,0,1.8210399945576985
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,8,128,1,float16,float16,0,2.4886666933695474
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,8,128,1,fp8,fp8,0,1.8373066584269206
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,64,8,128,1,float16,fp8,0,2.444197336832682
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,1,128,1,float16,float16,0,1.3142879803975422
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,64,128,1,float16,float16,0,1.342624028523763
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,64,128,1,float16,fp8,0,1.3272213141123455
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,1,128,1,float16,fp8,0,1.2830506960550945
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,1,128,1,fp8,fp8,0,0.9835466543833414
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,64,128,1,fp8,fp8,0,1.0389546553293865
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,2,128,1,float16,float16,0,1.3129013379414876
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,2,128,1,float16,fp8,0,1.2903467019399006
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,2,128,1,fp8,fp8,0,0.982426643371582
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,4,128,1,float16,float16,0,1.3256693681081135
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,4,128,1,float16,fp8,0,1.2996319929758708
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,8,128,1,float16,float16,0,1.3232053120930989
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,8,128,1,float16,fp8,0,1.3021759986877441
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,8,128,1,fp8,fp8,0,0.9929707050323486
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,64,4,128,1,fp8,fp8,0,0.9850666522979736
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,64,128,1,float16,float16,0,0.7367893060048422
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,64,128,1,float16,fp8,0,0.7291573683420817
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,1,128,1,float16,float16,0,0.7409226894378662
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,1,128,1,fp8,fp8,0,0.5675199826558431
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,2,128,1,float16,float16,0,0.7373653252919515
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,1,128,1,float16,fp8,0,0.7225066820780436
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,64,128,1,fp8,fp8,0,0.5919413169225057
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,2,128,1,fp8,fp8,0,0.5668906768163046
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,4,128,1,float16,float16,0,0.7378453413645426
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,2,128,1,float16,fp8,0,0.7211200396219889
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,4,128,1,float16,fp8,0,0.7249120076497396
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,4,128,1,fp8,fp8,0,0.5694400072097778
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,8,128,1,float16,float16,0,0.7397066752115885
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,8,128,1,float16,fp8,0,0.7280373573303223
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,64,8,128,1,fp8,fp8,0,0.57314133644104
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,64,1,128,1,float16,float16,0,12.500500996907553
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,64,1,128,1,float16,fp8,0,12.45456059773763
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,64,1,128,1,fp8,fp8,0,8.77729606628418
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,64,2,128,1,float16,float16,0,12.497156778971354
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,64,2,128,1,float16,fp8,0,12.411460876464844
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,64,2,128,1,fp8,fp8,0,8.841237386067709
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,64,4,128,1,float16,float16,0,12.464164733886719
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,64,4,128,1,float16,fp8,0,12.48150889078776
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,1,128,1,float16,float16,0,6.12229855855306
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,64,4,128,1,fp8,fp8,0,9.023455937703451
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,64,8,128,1,fp8,fp8,0,9.11355717976888
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,64,8,128,1,float16,float16,0,12.803077697753906
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,1,128,1,float16,fp8,0,6.01692263285319
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,64,8,128,1,float16,fp8,0,12.211104075113932
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,64,128,1,float16,float16,0,6.412634531656901
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,64,128,1,float16,fp8,0,6.068949381510417
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,1,128,1,fp8,fp8,0,4.405690511067708
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,64,128,1,fp8,fp8,0,4.74348258972168
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,2,128,1,float16,float16,0,6.045786539713542
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,2,128,1,float16,fp8,0,6.062608083089192
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,2,128,1,fp8,fp8,0,4.41484260559082
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,4,128,1,float16,float16,0,5.865594863891602
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,4,128,1,float16,fp8,0,6.040869394938151
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,4,128,1,fp8,fp8,0,4.381775856018066
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,8,128,1,float16,float16,0,6.216869354248047
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,8,128,1,float16,fp8,0,5.851802825927734
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,1,128,1,float16,float16,0,3.0085067749023438
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,64,8,128,1,fp8,fp8,0,4.486821174621582
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,1,128,1,float16,fp8,0,2.9295358657836914
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,1,128,1,fp8,fp8,0,2.223301410675049
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,64,128,1,float16,fp8,0,3.09931214650472
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,64,128,1,float16,float16,0,3.076016108194987
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,2,128,1,float16,float16,0,2.973087946573893
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,2,128,1,float16,fp8,0,2.928880055745443
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,2,128,1,fp8,fp8,0,2.237295945485433
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,4,128,1,float16,float16,0,2.9803520838419595
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,64,128,1,fp8,fp8,0,2.412090619405111
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,4,128,1,fp8,fp8,0,2.2409493128458657
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,4,128,1,float16,fp8,0,2.9120105107625327
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,8,128,1,float16,float16,0,2.9915733337402344
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,8,128,1,float16,fp8,0,2.936581293741862
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,64,8,128,1,fp8,fp8,0,2.2560532887776694
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,1,128,1,float16,float16,0,1.5525280634562175
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,1,128,1,float16,fp8,0,1.51038392384847
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,1,128,1,fp8,fp8,0,1.1682826677958171
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,64,128,1,float16,float16,0,1.5991466840108235
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,2,128,1,float16,float16,0,1.546160062154134
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,64,128,1,fp8,fp8,0,1.271392027537028
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,2,128,1,float16,fp8,0,1.5196800231933594
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,64,128,1,float16,fp8,0,1.5702133178710938
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,2,128,1,fp8,fp8,0,1.1756693522135417
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,4,128,1,float16,float16,0,1.547701358795166
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,4,128,1,fp8,fp8,0,1.1796693007151287
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,4,128,1,float16,fp8,0,1.5173813501993816
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,8,128,1,float16,float16,0,1.5550986925760906
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,8,128,1,float16,fp8,0,1.5170186360677083
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,64,8,128,1,fp8,fp8,0,1.1872533162434895
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,1,128,1,float16,float16,0,0.8349653085072836
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,1,128,1,float16,fp8,0,0.8108320236206055
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,64,128,1,float16,float16,0,0.8449333508809408
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,64,128,1,float16,fp8,0,0.8446826934814453
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,1,128,1,fp8,fp8,0,0.6456586519877116
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,64,128,1,fp8,fp8,0,0.6884426275889078
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,2,128,1,float16,float16,0,0.8326826890309652
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,2,128,1,fp8,fp8,0,0.6478826602300009
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,2,128,1,float16,fp8,0,0.815770705540975
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,4,128,1,float16,float16,0,0.8324266274770101
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,4,128,1,float16,fp8,0,0.8137546380360922
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,4,128,1,fp8,fp8,0,0.6489280064900717
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,8,128,1,float16,float16,0,0.8375466664632162
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,8,128,1,fp8,fp8,0,0.6493546565373739
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,64,8,128,1,float16,fp8,0,0.8186879952748617
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,64,128,1,float16,fp8,0,0.47788798809051514
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,64,128,1,float16,float16,0,0.4840000073115031
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,64,128,1,fp8,fp8,0,0.3967946767807007
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,1,128,1,float16,float16,0,0.47912001609802246
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,1,128,1,float16,fp8,0,0.468506654103597
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,1,128,1,fp8,fp8,0,0.36741332213083905
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,2,128,1,float16,float16,0,0.4786613384882609
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,2,128,1,float16,fp8,0,0.46697068214416504
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,2,128,1,fp8,fp8,0,0.36716266473134357
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,4,128,1,float16,float16,0,0.4827359914779663
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,4,128,1,float16,fp8,0,0.46691731611887616
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,4,128,1,fp8,fp8,0,0.36716798941294354
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,8,128,1,float16,float16,0,0.4825226863225301
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,8,128,1,float16,fp8,0,0.47229333718617755
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,64,8,128,1,fp8,fp8,0,0.3694506486256917
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,64,1,128,1,float16,fp8,0,12.722768147786459
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,64,1,128,1,fp8,fp8,0,9.579615910847982
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,64,1,128,1,float16,float16,0,12.94485346476237
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,64,2,128,1,float16,float16,0,13.008378346761068
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,64,2,128,1,fp8,fp8,0,9.651333491007486
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,64,4,128,1,float16,fp8,0,12.466064453125
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,64,4,128,1,float16,float16,0,12.75332260131836
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,64,2,128,1,float16,fp8,0,12.53429921468099
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,1,128,1,float16,float16,0,6.202890396118164
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,64,4,128,1,fp8,fp8,0,9.673632303873697
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,64,8,128,1,float16,float16,0,13.127189636230469
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,1,128,1,float16,fp8,0,6.113514582316081
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,64,8,128,1,float16,fp8,0,12.646527608235678
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,64,8,128,1,fp8,fp8,0,9.913125356038412
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,64,128,1,float16,float16,0,6.64790407816569
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,1,128,1,fp8,fp8,0,4.750250816345215
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,64,128,1,fp8,fp8,0,5.344010670979817
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,64,128,1,float16,fp8,0,6.532735824584961
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,2,128,1,float16,float16,0,6.392554601033528
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,2,128,1,fp8,fp8,0,4.764848073323567
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,2,128,1,float16,fp8,0,5.987354914347331
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,4,128,1,float16,float16,0,6.411104202270508
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,4,128,1,float16,fp8,0,6.163546880086263
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,4,128,1,fp8,fp8,0,4.7795413335164385
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,8,128,1,float16,float16,0,6.2626291910807295
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,1,128,1,float16,float16,0,3.057130813598633
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,8,128,1,float16,fp8,0,6.024138768513997
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,64,8,128,1,fp8,fp8,0,4.961450576782227
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,1,128,1,float16,fp8,0,2.9790614446004233
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,64,128,1,float16,float16,0,3.201589266459147
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,1,128,1,fp8,fp8,0,2.386319955190023
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,64,128,1,float16,fp8,0,3.168170611063639
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,64,128,1,fp8,fp8,0,2.6663519541422525
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,2,128,1,float16,float16,0,3.065418561299642
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,2,128,1,float16,fp8,0,2.958559989929199
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,2,128,1,fp8,fp8,0,2.39465061823527
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,4,128,1,float16,float16,0,3.0586185455322266
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,4,128,1,float16,fp8,0,2.971658706665039
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,4,128,1,fp8,fp8,0,2.4019039471944175
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,8,128,1,float16,float16,0,3.0590826670328775
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,8,128,1,float16,fp8,0,3.0103305180867515
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,64,8,128,1,fp8,fp8,0,2.426703929901123
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,1,128,1,float16,float16,0,1.5579466819763184
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,64,128,1,float16,float16,0,1.6259573300679524
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,1,128,1,float16,fp8,0,1.5259839693705242
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,64,128,1,float16,fp8,0,1.635306676228841
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,2,128,1,float16,float16,0,1.562346617380778
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,1,128,1,fp8,fp8,0,1.2361973126729329
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,64,128,1,fp8,fp8,0,1.3789706230163574
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,2,128,1,float16,fp8,0,1.531434694925944
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,2,128,1,fp8,fp8,0,1.2343626817067463
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,4,128,1,float16,float16,0,1.5625440279642742
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,4,128,1,float16,fp8,0,1.5304907162984211
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,4,128,1,fp8,fp8,0,1.2449866930643718
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,8,128,1,float16,float16,0,1.573856035868327
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,8,128,1,float16,fp8,0,1.5377440452575684
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,64,8,128,1,fp8,fp8,0,1.2593493461608887
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,1,128,1,float16,float16,0,0.8265386422475179
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,1,128,1,float16,fp8,0,0.8049173355102539
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,64,128,1,float16,float16,0,0.8626240094502767
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,64,128,1,float16,fp8,0,0.8457067012786865
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,1,128,1,fp8,fp8,0,0.6585919857025146
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,2,128,1,float16,float16,0,0.824293295542399
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,2,128,1,float16,fp8,0,0.8060159683227539
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,2,128,1,fp8,fp8,0,0.6634186506271362
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,64,128,1,fp8,fp8,0,0.7354506651560465
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,4,128,1,float16,float16,0,0.8320693174997965
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,4,128,1,float16,fp8,0,0.8089386622111002
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,4,128,1,fp8,fp8,0,0.6619413296381632
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,8,128,1,fp8,fp8,0,0.6701599756876627
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,8,128,1,float16,fp8,0,0.8103413581848145
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,64,8,128,1,float16,float16,0,0.8278826872507731
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,1,128,1,float16,float16,0,0.4575573205947876
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,64,128,1,float16,fp8,0,0.4598453442255656
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,64,128,1,float16,float16,0,0.4681333303451538
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,64,128,1,fp8,fp8,0,0.4053226709365845
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,1,128,1,float16,fp8,0,0.4434933265050252
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,1,128,1,fp8,fp8,0,0.37218133608500165
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,2,128,1,float16,float16,0,0.45740266640981037
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,2,128,1,float16,fp8,0,0.4436266819636027
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,2,128,1,fp8,fp8,0,0.37326399485270184
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,4,128,1,float16,float16,0,0.45766401290893555
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,4,128,1,float16,fp8,0,0.4482613404591878
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,4,128,1,fp8,fp8,0,0.3745013475418091
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,8,128,1,float16,float16,0,0.45837334791819256
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,8,128,1,float16,fp8,0,0.4484906593958537
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,64,8,128,1,fp8,fp8,0,0.3760746717453003
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,64,128,1,float16,float16,0,0.27596267064412433
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,1,128,1,float16,float16,0,0.27297067642211914
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,1,128,1,float16,fp8,0,0.26498132944107056
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,1,128,1,fp8,fp8,0,0.2148053248723348
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,64,128,1,float16,fp8,0,0.2741386691729228
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,64,128,1,fp8,fp8,0,0.23391467332839966
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,2,128,1,float16,float16,0,0.2723733385403951
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,2,128,1,float16,fp8,0,0.263589342435201
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,2,128,1,fp8,fp8,0,0.21408534049987793
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,4,128,1,float16,fp8,0,0.2664960026741028
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,4,128,1,float16,float16,0,0.2746613423029582
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,4,128,1,fp8,fp8,0,0.21504533290863037
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,8,128,1,float16,float16,0,0.2753653327624003
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,8,128,1,float16,fp8,0,0.26610134045283
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,64,8,128,1,fp8,fp8,0,0.21780800819396973
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,64,1,128,1,float16,float16,0,8.068373362223307
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,64,1,128,1,float16,fp8,0,8.00331179300944
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,64,1,128,1,fp8,fp8,0,6.401386896769206
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,64,2,128,1,fp8,fp8,0,6.432863871256511
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,64,2,128,1,float16,fp8,0,7.869690577189128
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,64,2,128,1,float16,float16,0,8.090037027994791
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,64,4,128,1,float16,float16,0,8.018143971761068
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,64,4,128,1,float16,fp8,0,7.925504048665364
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,1,128,1,float16,float16,0,3.9109226862589517
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,64,4,128,1,fp8,fp8,0,6.445770899454753
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,1,128,1,float16,fp8,0,3.7973546981811523
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,64,8,128,1,float16,fp8,0,8.096517562866211
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,64,8,128,1,float16,float16,0,8.210906982421875
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,64,8,128,1,fp8,fp8,0,6.508826573689778
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,64,128,1,float16,float16,0,4.169013341267903
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,64,128,1,float16,fp8,0,4.165679931640625
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,64,128,1,fp8,fp8,0,3.594170570373535
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,1,128,1,fp8,fp8,0,3.1862452824910483
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,2,128,1,float16,float16,0,3.923114776611328
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,2,128,1,float16,fp8,0,3.8249387741088867
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,2,128,1,fp8,fp8,0,3.1933228174845376
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,4,128,1,float16,float16,0,3.975621223449707
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,4,128,1,float16,fp8,0,3.789221445719401
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,4,128,1,fp8,fp8,0,3.2132320404052734
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,8,128,1,float16,float16,0,3.9275894165039062
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,8,128,1,float16,fp8,0,3.8150612513224282
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,64,8,128,1,fp8,fp8,0,3.2392587661743164
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,1,128,1,float16,float16,0,1.9667040506998699
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,1,128,1,float16,fp8,0,1.9120747248331706
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,1,128,1,fp8,fp8,0,1.6091519991556804
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,64,128,1,float16,float16,0,2.117530663808187
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,64,128,1,float16,fp8,0,2.0745760599772134
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,2,128,1,float16,float16,0,1.9689173698425293
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,64,128,1,fp8,fp8,0,1.8410986264546711
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,2,128,1,float16,fp8,0,1.9188799858093262
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,2,128,1,fp8,fp8,0,1.6165226300557454
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,4,128,1,float16,float16,0,1.979573408762614
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,4,128,1,float16,fp8,0,1.9216052691141765
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,4,128,1,fp8,fp8,0,1.633562723795573
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,8,128,1,float16,float16,0,1.981887976328532
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,8,128,1,float16,fp8,0,1.9323573112487793
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,1,128,1,float16,float16,0,1.0179626941680908
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,64,8,128,1,fp8,fp8,0,1.643898646036784
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,1,128,1,float16,fp8,0,0.9913600285847982
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,64,128,1,float16,fp8,0,1.0633440017700195
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,64,128,1,float16,float16,0,1.0856800079345703
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,1,128,1,fp8,fp8,0,0.8378506501515707
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,2,128,1,float16,float16,0,1.0202986399332683
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,64,128,1,fp8,fp8,0,0.9497493108113607
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,2,128,1,float16,fp8,0,0.9918293158213297
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,2,128,1,fp8,fp8,0,0.84771196047465
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,4,128,1,float16,float16,0,1.0273760159810383
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,4,128,1,float16,fp8,0,0.9957013130187988
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,4,128,1,fp8,fp8,0,0.8467893600463867
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,8,128,1,float16,float16,0,1.0258293151855469
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,8,128,1,float16,fp8,0,1.0009439786275227
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,64,8,128,1,fp8,fp8,0,0.8562880357106527
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,1,128,1,float16,float16,0,0.5443146626154581
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,1,128,1,float16,fp8,0,0.5290186802546183
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,64,128,1,float16,float16,0,0.5669600168863932
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,1,128,1,fp8,fp8,0,0.4535679817199707
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,64,128,1,fp8,fp8,0,0.5159573157628378
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,2,128,1,float16,float16,0,0.5451200008392334
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,64,128,1,float16,fp8,0,0.567408005396525
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,2,128,1,float16,fp8,0,0.5333280165990194
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,2,128,1,fp8,fp8,0,0.4547040065129598
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,4,128,1,float16,float16,0,0.5473173459370931
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,4,128,1,float16,fp8,0,0.5308533509572347
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,4,128,1,fp8,fp8,0,0.45653335253397626
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,8,128,1,float16,float16,0,0.5510346492131551
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,8,128,1,float16,fp8,0,0.5346133311589559
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,64,8,128,1,fp8,fp8,0,0.46306665738423664
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,64,128,1,float16,float16,0,0.3189813296000163
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,1,128,1,float16,float16,0,0.3059999942779541
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,1,128,1,float16,fp8,0,0.29637332757314044
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,64,128,1,float16,fp8,0,0.31353600819905597
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,1,128,1,fp8,fp8,0,0.2486613392829895
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,2,128,1,float16,float16,0,0.30827200412750244
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,64,128,1,fp8,fp8,0,0.28672534227371216
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,2,128,1,float16,fp8,0,0.29920534292856854
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,2,128,1,fp8,fp8,0,0.24988800287246704
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,4,128,1,float16,float16,0,0.30826665957768756
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,4,128,1,float16,fp8,0,0.29896533489227295
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,4,128,1,fp8,fp8,0,0.24969067176183066
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,8,128,1,float16,float16,0,0.31091199318567914
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,8,128,1,fp8,fp8,0,0.25200533866882324
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,64,8,128,1,float16,fp8,0,0.30080533027648926
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,64,128,1,float16,float16,0,0.18675732612609863
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,1,128,1,float16,float16,0,0.17548267046610513
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,64,128,1,fp8,fp8,0,0.16632533073425293
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,64,128,1,float16,fp8,0,0.1827253301938375
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,1,128,1,float16,fp8,0,0.17029333114624023
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,1,128,1,fp8,fp8,0,0.15382400155067444
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,2,128,1,float16,float16,0,0.17661333084106445
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,2,128,1,float16,fp8,0,0.16983999808629355
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,2,128,1,fp8,fp8,0,0.15335466464360556
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,4,128,1,float16,float16,0,0.17654933532079062
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,4,128,1,float16,fp8,0,0.17071467638015747
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,4,128,1,fp8,fp8,0,0.15390400091807047
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,8,128,1,float16,float16,0,0.17757334311803183
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,8,128,1,float16,fp8,0,0.17149333159128824
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,64,8,128,1,fp8,fp8,0,0.1550826629002889
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,64,1,128,1,float16,float16,0,7.881584167480469
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,64,1,128,1,fp8,fp8,0,6.3590240478515625
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,64,1,128,1,float16,fp8,0,7.821584065755208
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,64,2,128,1,float16,float16,0,7.702655792236328
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,64,2,128,1,fp8,fp8,0,6.384543736775716
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,64,2,128,1,float16,fp8,0,7.946874618530273
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,64,4,128,1,float16,float16,0,8.011311848958334
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,64,4,128,1,float16,fp8,0,7.798437118530273
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,1,128,1,float16,float16,0,3.7207199732462564
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,64,4,128,1,fp8,fp8,0,6.4783891042073565
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,1,128,1,float16,fp8,0,3.688762664794922
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,64,8,128,1,float16,float16,0,8.137637456258139
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,64,8,128,1,fp8,fp8,0,6.543781280517578
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,64,8,128,1,float16,fp8,0,8.07144546508789
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,64,128,1,fp8,fp8,0,3.7020746866861978
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,1,128,1,fp8,fp8,0,3.158538818359375
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,64,128,1,float16,fp8,0,4.373461405436198
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,64,128,1,float16,float16,0,4.47594674428304
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,2,128,1,float16,float16,0,3.7966667811075845
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,2,128,1,fp8,fp8,0,3.17305596669515
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,2,128,1,float16,fp8,0,3.857503890991211
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,4,128,1,float16,float16,0,3.8334506352742515
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,4,128,1,float16,fp8,0,3.8636531829833984
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,4,128,1,fp8,fp8,0,3.2138293584187827
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,8,128,1,fp8,fp8,0,3.2616373697916665
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,8,128,1,float16,fp8,0,3.901418685913086
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,64,8,128,1,float16,float16,0,3.900416056315104
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,1,128,1,float16,float16,0,1.8558239936828613
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,1,128,1,float16,fp8,0,1.8512585957845051
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,64,128,1,float16,float16,0,2.2028959592183432
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,64,128,1,float16,fp8,0,2.1603946685791016
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,1,128,1,fp8,fp8,0,1.5821866989135742
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,64,128,1,fp8,fp8,0,1.86626132329305
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,2,128,1,float16,fp8,0,1.865781307220459
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,2,128,1,float16,float16,0,1.8542292912801106
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,2,128,1,fp8,fp8,0,1.5895466804504395
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,4,128,1,float16,float16,0,1.883845329284668
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,4,128,1,float16,fp8,0,1.8962027231852214
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,4,128,1,fp8,fp8,0,1.6073493957519531
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,8,128,1,float16,float16,0,1.893594741821289
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,8,128,1,float16,fp8,0,1.9280799229939778
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,1,128,1,float16,float16,0,0.9401013056437174
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,64,8,128,1,fp8,fp8,0,1.6324106852213542
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,1,128,1,float16,fp8,0,0.9399413267771403
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,64,128,1,float16,float16,0,1.0905226866404216
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,64,128,1,float16,fp8,0,1.0762826601664226
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,1,128,1,fp8,fp8,0,0.7869706948598226
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,2,128,1,float16,float16,0,0.9431626796722412
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,2,128,1,fp8,fp8,0,0.7963146368662516
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,2,128,1,float16,fp8,0,0.9425013065338135
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,64,128,1,fp8,fp8,0,0.9272747039794922
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,4,128,1,float16,float16,0,0.9474026362101237
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,4,128,1,float16,fp8,0,0.9498026371002197
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,4,128,1,fp8,fp8,0,0.8184800148010254
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,8,128,1,float16,float16,0,0.9578346411387125
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,8,128,1,float16,fp8,0,0.9573226769765218
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,64,8,128,1,fp8,fp8,0,0.829194704691569
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,1,128,1,float16,float16,0,0.48389867941538495
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,1,128,1,float16,fp8,0,0.4875093301137288
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,64,128,1,float16,float16,0,0.5538026491800944
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,1,128,1,fp8,fp8,0,0.4033333460489909
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,64,128,1,float16,fp8,0,0.5424533287684122
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,64,128,1,fp8,fp8,0,0.4723466634750366
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,2,128,1,float16,float16,0,0.48655466238657635
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,2,128,1,float16,fp8,0,0.4878079891204834
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,2,128,1,fp8,fp8,0,0.4025599956512451
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,4,128,1,float16,float16,0,0.4870186646779378
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,4,128,1,float16,fp8,0,0.4864159822463989
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,4,128,1,fp8,fp8,0,0.4087413152058919
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,8,128,1,float16,fp8,0,0.49164267381032306
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,8,128,1,float16,float16,0,0.49079465866088867
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,1,128,1,float16,float16,0,0.2536533276240031
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,64,8,128,1,fp8,fp8,0,0.41730133692423504
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,1,128,1,float16,fp8,0,0.25599465767542523
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,64,128,1,float16,fp8,0,0.2855093280474345
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,64,128,1,float16,float16,0,0.2937493324279785
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,1,128,1,fp8,fp8,0,0.2116746703783671
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,2,128,1,float16,float16,0,0.25643734137217206
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,64,128,1,fp8,fp8,0,0.24794133504231772
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,2,128,1,float16,fp8,0,0.25552000602086383
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,2,128,1,fp8,fp8,0,0.2143519918123881
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,4,128,1,float16,float16,0,0.25833600759506226
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,4,128,1,float16,fp8,0,0.25789332389831543
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,4,128,1,fp8,fp8,0,0.2148373325665792
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,8,128,1,float16,float16,0,0.25887467463811237
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,8,128,1,fp8,fp8,0,0.21727999051411948
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,64,8,128,1,float16,fp8,0,0.2597973346710205
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,64,128,1,float16,float16,0,0.1588213344415029
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,64,128,1,float16,fp8,0,0.1569493313630422
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,1,128,1,float16,float16,0,0.13939733306566873
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,1,128,1,float16,fp8,0,0.13985600074132284
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,64,128,1,fp8,fp8,0,0.1280693312486013
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,1,128,1,fp8,fp8,0,0.10820266604423523
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,2,128,1,float16,float16,0,0.1409119963645935
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,2,128,1,float16,fp8,0,0.14013866583506265
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,2,128,1,fp8,fp8,0,0.10807999968528748
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,4,128,1,float16,float16,0,0.14173332850138345
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,4,128,1,float16,fp8,0,0.1411626636981964
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,4,128,1,fp8,fp8,0,0.10984533031781514
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,8,128,1,float16,float16,0,0.1431893308957418
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,8,128,1,float16,fp8,0,0.1422879993915558
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,64,8,128,1,fp8,fp8,0,0.11161599556605022
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,64,128,1,float16,float16,0,0.08467732866605122
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,1,128,1,float16,float16,0,0.07272000114123027
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,64,128,1,float16,fp8,0,0.08251200119654338
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,1,128,1,float16,fp8,0,0.07372266550858815
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,1,128,1,fp8,fp8,0,0.059290667374928795
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,64,128,1,fp8,fp8,0,0.07251733541488647
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,2,128,1,float16,float16,0,0.07297599812348683
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,2,128,1,float16,fp8,0,0.07316799958546956
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,2,128,1,fp8,fp8,0,0.06031466523806254
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,4,128,1,float16,float16,0,0.07420800129572551
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,4,128,1,float16,fp8,0,0.07434666653474171
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,4,128,1,fp8,fp8,0,0.06089599927266439
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,8,128,1,float16,float16,0,0.07507200042406718
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,8,128,1,float16,fp8,0,0.07473599910736084
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,64,8,128,1,fp8,fp8,0,0.06204266846179962
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,64,1,128,1,float16,float16,0,5.9703413645426435
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,64,1,128,1,float16,fp8,0,5.884837468465169
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,64,1,128,1,fp8,fp8,0,5.297002792358398
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,64,2,128,1,fp8,fp8,0,5.31497065226237
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,64,2,128,1,float16,float16,0,5.931834538777669
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,64,4,128,1,float16,float16,0,5.99507204691569
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,64,4,128,1,float16,fp8,0,5.966933568318685
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,64,2,128,1,float16,fp8,0,5.913904190063477
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,1,128,1,float16,float16,0,2.885045369466146
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,64,4,128,1,fp8,fp8,0,5.414613087972005
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,1,128,1,float16,fp8,0,2.875055948893229
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,64,8,128,1,float16,float16,0,6.080053329467773
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,64,8,128,1,fp8,fp8,0,5.475866953531901
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,64,8,128,1,float16,fp8,0,6.061194737752278
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,64,128,1,float16,float16,0,3.61898136138916
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,64,128,1,fp8,fp8,0,3.180858612060547
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,64,128,1,float16,fp8,0,3.5371198654174805
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,1,128,1,fp8,fp8,0,2.6321919759114585
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,2,128,1,float16,fp8,0,2.906709353129069
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,2,128,1,float16,float16,0,2.9235572814941406
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,2,128,1,fp8,fp8,0,2.649600028991699
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,4,128,1,float16,float16,0,2.985194524129232
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,4,128,1,float16,fp8,0,2.969498634338379
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,4,128,1,fp8,fp8,0,2.6898934046427407
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,8,128,1,float16,float16,0,3.0298986434936523
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,8,128,1,float16,fp8,0,3.0188159942626953
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,1,128,1,float16,float16,0,1.4383039474487305
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,64,8,128,1,fp8,fp8,0,2.730245272318522
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,1,128,1,float16,fp8,0,1.4355039596557617
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,1,128,1,fp8,fp8,0,1.3174346288045247
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,64,128,1,float16,fp8,0,1.7597546577453613
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,2,128,1,float16,float16,0,1.449845314025879
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,64,128,1,float16,float16,0,1.8137280146280925
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,64,128,1,fp8,fp8,0,1.5930719375610352
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,2,128,1,float16,fp8,0,1.440922737121582
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,2,128,1,fp8,fp8,0,1.3127520084381104
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,4,128,1,float16,float16,0,1.4461119969685872
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,4,128,1,float16,fp8,0,1.4674827257792156
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,4,128,1,fp8,fp8,0,1.3395573298136394
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,8,128,1,float16,float16,0,1.4761974016825359
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,8,128,1,float16,fp8,0,1.4803412755330403
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,64,8,128,1,fp8,fp8,0,1.357279936472575
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,1,128,1,float16,float16,0,0.7315893173217773
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,64,128,1,float16,float16,0,0.8981760342915853
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,1,128,1,float16,fp8,0,0.7297493616739908
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,1,128,1,fp8,fp8,0,0.6495253245035807
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,2,128,1,float16,float16,0,0.7307466665903727
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,64,128,1,float16,fp8,0,0.8786133130391439
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,64,128,1,fp8,fp8,0,0.7929333051045736
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,2,128,1,float16,fp8,0,0.7323199907938639
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,2,128,1,fp8,fp8,0,0.6542719999949137
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,4,128,1,float16,float16,0,0.7344853083292643
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,4,128,1,float16,fp8,0,0.7379946708679199
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,4,128,1,fp8,fp8,0,0.6742933591206869
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,8,128,1,float16,float16,0,0.7448533376057943
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,1,128,1,float16,float16,0,0.3766453266143799
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,8,128,1,float16,fp8,0,0.74617600440979
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,64,8,128,1,fp8,fp8,0,0.6849173704783121
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,1,128,1,float16,fp8,0,0.3765546480814616
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,64,128,1,float16,float16,0,0.4533119996388753
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,1,128,1,fp8,fp8,0,0.33368531862894696
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,64,128,1,float16,fp8,0,0.44198401769002277
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,2,128,1,float16,float16,0,0.3762720028559367
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,2,128,1,float16,fp8,0,0.37668267885843915
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,64,128,1,fp8,fp8,0,0.40381332238515216
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,2,128,1,fp8,fp8,0,0.3335946798324585
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,4,128,1,float16,float16,0,0.37992000579833984
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,4,128,1,float16,fp8,0,0.37768534819285077
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,4,128,1,fp8,fp8,0,0.3383466800053914
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,8,128,1,float16,float16,0,0.3834933439890544
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,8,128,1,float16,fp8,0,0.38326934973398846
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,64,8,128,1,fp8,fp8,0,0.34322134653727215
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,64,128,1,float16,float16,0,0.23907732963562012
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,64,128,1,float16,fp8,0,0.23233066002527872
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,1,128,1,float16,float16,0,0.1975626746813456
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,1,128,1,float16,fp8,0,0.19993066787719727
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,1,128,1,fp8,fp8,0,0.17731734116872153
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,64,128,1,fp8,fp8,0,0.21156799793243408
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,2,128,1,float16,float16,0,0.1991893251736959
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,2,128,1,float16,fp8,0,0.1988640030225118
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,2,128,1,fp8,fp8,0,0.17640000581741333
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,4,128,1,float16,float16,0,0.1990399956703186
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,4,128,1,float16,fp8,0,0.20033599932988486
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,4,128,1,fp8,fp8,0,0.1782133380572001
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,8,128,1,float16,float16,0,0.2023413379987081
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,8,128,1,float16,fp8,0,0.20332266887029013
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,64,8,128,1,fp8,fp8,0,0.1808533271153768
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,64,128,1,float16,float16,0,0.13079999883969626
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,1,128,1,float16,float16,0,0.10829333464304607
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,1,128,1,float16,fp8,0,0.10891733566919963
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,64,128,1,float16,fp8,0,0.12745066483815512
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,1,128,1,fp8,fp8,0,0.09072533249855042
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,64,128,1,fp8,fp8,0,0.11266133189201355
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,2,128,1,float16,float16,0,0.10831999778747559
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,2,128,1,float16,fp8,0,0.10903466741243999
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,2,128,1,fp8,fp8,0,0.09115733702977498
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,4,128,1,float16,float16,0,0.11040000120798747
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,4,128,1,float16,fp8,0,0.10975999633471172
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,4,128,1,fp8,fp8,0,0.09178133805592854
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,8,128,1,float16,float16,0,0.11126933495203654
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,8,128,1,float16,fp8,0,0.11076266566912334
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,64,8,128,1,fp8,fp8,0,0.09397866328557332
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,1,128,1,float16,float16,0,0.06070399781068166
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,64,128,1,float16,float16,0,0.073253333568573
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,1,128,1,float16,fp8,0,0.06026133398214976
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,64,128,1,float16,fp8,0,0.07155199845631917
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,64,128,1,fp8,fp8,0,0.06385600070158641
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,1,128,1,fp8,fp8,0,0.05100266635417938
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,2,128,1,float16,float16,0,0.060378665725390114
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,2,128,1,float16,fp8,0,0.06058133145173391
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,2,128,1,fp8,fp8,0,0.0505920002857844
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,4,128,1,float16,float16,0,0.06069866816202799
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,4,128,1,float16,fp8,0,0.06148266792297363
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,4,128,1,fp8,fp8,0,0.05225066840648651
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,8,128,1,float16,float16,0,0.06181333462397257
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,8,128,1,float16,fp8,0,0.06144000093142191
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,64,8,128,1,fp8,fp8,0,0.05415999889373779
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,64,128,1,float16,float16,0,0.040175999204317726
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,1,128,1,float16,float16,0,0.03563733398914337
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,64,128,1,float16,fp8,0,0.03948266555865606
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,1,128,1,float16,fp8,0,0.03565333286921183
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,64,128,1,fp8,fp8,0,0.0378560001651446
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,2,128,1,float16,fp8,0,0.035760000348091125
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,1,128,1,fp8,fp8,0,0.03201066702604294
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,2,128,1,float16,float16,0,0.03572266548871994
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,2,128,1,fp8,fp8,0,0.032058666149775185
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,4,128,1,float16,float16,0,0.036389333506425224
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,4,128,1,float16,fp8,0,0.036559998989105225
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,4,128,1,fp8,fp8,0,0.03398933261632919
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,8,128,1,float16,float16,0,0.03645866612593333
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,8,128,1,float16,fp8,0,0.036650667587916054
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,64,8,128,1,fp8,fp8,0,0.0340639998515447
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,64,1,128,1,float16,float16,0,2.496986707051595
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,64,1,128,1,float16,fp8,0,2.4971413612365723
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,64,1,128,1,fp8,fp8,0,2.139616012573242
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,64,2,128,1,float16,float16,0,2.5034772555033364
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,64,2,128,1,float16,fp8,0,2.478490670522054
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,64,2,128,1,fp8,fp8,0,2.151696046193441
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,64,4,128,1,float16,fp8,0,2.5432960192362466
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,64,4,128,1,float16,float16,0,2.5532479286193848
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,1,128,1,float16,float16,0,1.2283519903818767
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,64,4,128,1,fp8,fp8,0,2.1919520696004233
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,64,8,128,1,float16,float16,0,2.589973290761312
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,1,128,1,float16,fp8,0,1.2254880269368489
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,64,8,128,1,float16,fp8,0,2.604405403137207
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,64,8,128,1,fp8,fp8,0,2.2588747342427573
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,64,128,1,float16,fp8,0,1.563792069753011
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,64,128,1,float16,float16,0,1.6162452697753906
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,64,128,1,fp8,fp8,0,1.3707253138224285
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,1,128,1,fp8,fp8,0,1.0726666450500488
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,2,128,1,float16,float16,0,1.247045358022054
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,2,128,1,float16,fp8,0,1.232805331548055
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,2,128,1,fp8,fp8,0,1.073247989018758
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,4,128,1,float16,float16,0,1.239792029062907
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,4,128,1,float16,fp8,0,1.2542400360107422
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,4,128,1,fp8,fp8,0,1.0954453150431316
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,8,128,1,float16,float16,0,1.2731573581695557
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,8,128,1,float16,fp8,0,1.2816853523254395
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,1,128,1,float16,float16,0,0.6215573151906332
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,64,8,128,1,fp8,fp8,0,1.130346695582072
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,1,128,1,float16,fp8,0,0.6230560143788656
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,64,128,1,float16,float16,0,0.8035360177357992
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,64,128,1,float16,fp8,0,0.7759520212809244
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,1,128,1,fp8,fp8,0,0.5262826681137085
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,64,128,1,fp8,fp8,0,0.6832160154978434
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,2,128,1,float16,float16,0,0.6253973245620728
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,2,128,1,float16,fp8,0,0.626362681388855
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,2,128,1,fp8,fp8,0,0.5366559823354086
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,4,128,1,float16,float16,0,0.6293653249740601
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,4,128,1,float16,fp8,0,0.6286293268203735
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,4,128,1,fp8,fp8,0,0.5522559881210327
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,8,128,1,float16,float16,0,0.6335200071334839
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,8,128,1,float16,fp8,0,0.6365706523259481
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,64,8,128,1,fp8,fp8,0,0.5718239943186442
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,1,128,1,float16,float16,0,0.32105066378911334
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,1,128,1,float16,fp8,0,0.3197493354479472
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,1,128,1,fp8,fp8,0,0.27264533440272015
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,64,128,1,float16,fp8,0,0.39242132504781085
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,64,128,1,float16,float16,0,0.40518399079640705
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,64,128,1,fp8,fp8,0,0.350597341855367
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,2,128,1,float16,float16,0,0.3221333424250285
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,2,128,1,float16,fp8,0,0.3221120039621989
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,2,128,1,fp8,fp8,0,0.2741386691729228
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,4,128,1,float16,float16,0,0.3227679928143819
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,4,128,1,float16,fp8,0,0.3228106697400411
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,4,128,1,fp8,fp8,0,0.2800266742706299
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,8,128,1,float16,fp8,0,0.32705066601435345
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,8,128,1,float16,float16,0,0.3264960050582886
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,64,8,128,1,fp8,fp8,0,0.2853493293126424
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,64,128,1,float16,float16,0,0.21423999468485513
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,1,128,1,float16,float16,0,0.1695093313852946
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,1,128,1,float16,fp8,0,0.16991466283798218
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,64,128,1,float16,fp8,0,0.20627733071645102
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,1,128,1,fp8,fp8,0,0.14573867122332254
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,2,128,1,float16,float16,0,0.16952000061670938
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,64,128,1,fp8,fp8,0,0.18445332845052084
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,2,128,1,float16,fp8,0,0.17072532574335733
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,2,128,1,fp8,fp8,0,0.1471519966920217
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,4,128,1,float16,float16,0,0.17054933309555054
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,4,128,1,float16,fp8,0,0.17190933227539062
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,4,128,1,fp8,fp8,0,0.14761066436767578
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,8,128,1,float16,float16,0,0.17243733008702597
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,8,128,1,float16,fp8,0,0.17306667566299438
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,64,8,128,1,fp8,fp8,0,0.15198933084805807
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,1,128,1,float16,float16,0,0.09308800101280212
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,1,128,1,float16,fp8,0,0.09305066863695781
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,64,128,1,float16,float16,0,0.11680533488591512
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,64,128,1,float16,fp8,0,0.11351999640464783
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,1,128,1,fp8,fp8,0,0.08171733220418294
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,64,128,1,fp8,fp8,0,0.10513599713643391
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,2,128,1,float16,float16,0,0.09331199526786804
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,2,128,1,float16,fp8,0,0.0932426651318868
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,2,128,1,fp8,fp8,0,0.08181333541870117
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,4,128,1,float16,fp8,0,0.09405866265296936
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,4,128,1,float16,float16,0,0.09345066547393799
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,4,128,1,fp8,fp8,0,0.08317333459854126
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,8,128,1,float16,float16,0,0.09553600351015727
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,8,128,1,float16,fp8,0,0.09526399771372478
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,64,8,128,1,fp8,fp8,0,0.08452799916267395
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,64,128,1,float16,float16,0,0.07072533170382182
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,64,128,1,float16,fp8,0,0.06838933130105336
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,1,128,1,float16,float16,0,0.053583999474843345
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,1,128,1,float16,fp8,0,0.05499733487764994
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,2,128,1,float16,float16,0,0.0544053316116333
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,64,128,1,fp8,fp8,0,0.05955199897289276
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,1,128,1,fp8,fp8,0,0.04636266827583313
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,2,128,1,float16,fp8,0,0.054618666569391884
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,4,128,1,float16,float16,0,0.054757331808408104
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,2,128,1,fp8,fp8,0,0.04595200220743815
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,4,128,1,float16,fp8,0,0.054917335510253906
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,8,128,1,float16,float16,0,0.05569600065549215
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,4,128,1,fp8,fp8,0,0.0469760000705719
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,8,128,1,fp8,fp8,0,0.04849599798520406
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,64,8,128,1,float16,fp8,0,0.05583466589450836
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,64,128,1,float16,float16,0,0.036474667489528656
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,1,128,1,float16,float16,0,0.032602667808532715
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,64,128,1,fp8,fp8,0,0.03570666660865148
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,64,128,1,float16,fp8,0,0.0369759996732076
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,1,128,1,fp8,fp8,0,0.029546665648619335
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,1,128,1,float16,fp8,0,0.03277866790692011
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,2,128,1,float16,float16,0,0.032655999064445496
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,2,128,1,float16,fp8,0,0.033029332756996155
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,2,128,1,fp8,fp8,0,0.02943466603755951
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,4,128,1,float16,float16,0,0.0332640012105306
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,4,128,1,float16,fp8,0,0.03363733241955439
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,4,128,1,fp8,fp8,0,0.031530665854612984
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,8,128,1,float16,float16,0,0.03369600077470144
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,8,128,1,fp8,fp8,0,0.0317493329445521
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,64,128,1,float16,fp8,0,0.02407466620206833
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,64,8,128,1,float16,fp8,0,0.03430933256944021
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,64,128,1,float16,float16,0,0.024117333193620045
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,64,128,1,fp8,fp8,0,0.022890667120615642
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,1,128,1,float16,float16,0,0.022346665461858112
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,1,128,1,fp8,fp8,0,0.021104000508785248
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,2,128,1,float16,float16,0,0.02254933367172877
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,1,128,1,float16,fp8,0,0.022821334501107533
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,2,128,1,float16,fp8,0,0.022634667654832203
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,4,128,1,float16,float16,0,0.022895999252796173
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,2,128,1,fp8,fp8,0,0.020960000654061634
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,4,128,1,float16,fp8,0,0.023221333821614582
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,4,128,1,fp8,fp8,0,0.021669333179791767
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,8,128,1,float16,float16,0,0.022800001005331676
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,8,128,1,float16,fp8,0,0.0232640008131663
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,64,8,128,1,fp8,fp8,0,0.022122666239738464
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,64,1,128,1,float16,float16,0,1.1242187023162842
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,64,1,128,1,fp8,fp8,0,1.066981315612793
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,64,2,128,1,float16,float16,0,1.127557357152303
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,64,2,128,1,float16,fp8,0,1.1270453135172527
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,64,1,128,1,float16,fp8,0,1.1206506888071697
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,64,4,128,1,float16,float16,0,1.144704023996989
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,64,4,128,1,float16,fp8,0,1.1552746295928955
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,64,2,128,1,fp8,fp8,0,1.0717120170593262
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,1,128,1,float16,float16,0,0.5718666712443033
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,64,4,128,1,fp8,fp8,0,1.1000479857126872
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,64,8,128,1,float16,float16,0,1.151530663172404
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,64,8,128,1,fp8,fp8,0,1.1309599876403809
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,64,8,128,1,float16,fp8,0,1.172869364420573
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,1,128,1,float16,fp8,0,0.5725119908650717
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,64,128,1,float16,fp8,0,0.7265066305796305
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,64,128,1,float16,float16,0,0.7548800309499105
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,64,128,1,fp8,fp8,0,0.6816159884134928
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,1,128,1,fp8,fp8,0,0.5265013376871744
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,2,128,1,float16,float16,0,0.5718400080998739
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,2,128,1,float16,fp8,0,0.5722560087839762
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,2,128,1,fp8,fp8,0,0.5314079920450846
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,4,128,1,float16,float16,0,0.5773919820785522
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,4,128,1,float16,fp8,0,0.5777386824289957
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,4,128,1,fp8,fp8,0,0.5508586565653483
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,8,128,1,float16,float16,0,0.583845337231954
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,8,128,1,float16,fp8,0,0.5834720134735107
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,64,8,128,1,fp8,fp8,0,0.5708426634470621
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,1,128,1,float16,fp8,0,0.2962399919827779
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,1,128,1,float16,float16,0,0.29579732815424603
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,1,128,1,fp8,fp8,0,0.2737920085589091
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,2,128,1,float16,float16,0,0.29732267061869305
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,64,128,1,float16,fp8,0,0.37725333372751874
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,64,128,1,float16,float16,0,0.3904053370157878
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,2,128,1,float16,fp8,0,0.2969226638476054
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,64,128,1,fp8,fp8,0,0.35023999214172363
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,2,128,1,fp8,fp8,0,0.2746293346087138
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,4,128,1,float16,float16,0,0.29839466015497845
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,4,128,1,float16,fp8,0,0.2988746762275696
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,4,128,1,fp8,fp8,0,0.27853866418202716
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,8,128,1,float16,float16,0,0.30295999844868976
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,8,128,1,float16,fp8,0,0.3025173346201579
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,1,128,1,float16,float16,0,0.15797866384188333
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,64,8,128,1,fp8,fp8,0,0.2847573359807332
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,1,128,1,float16,fp8,0,0.15803733468055725
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,1,128,1,fp8,fp8,0,0.14517333110173544
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,2,128,1,float16,float16,0,0.15759999553362528
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,64,128,1,float16,float16,0,0.21734933058420816
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,64,128,1,float16,fp8,0,0.2037066618601481
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,64,128,1,fp8,fp8,0,0.1829813321431478
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,2,128,1,float16,fp8,0,0.1590079963207245
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,2,128,1,fp8,fp8,0,0.14603733023007712
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,4,128,1,float16,float16,0,0.16006933649381003
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,4,128,1,float16,fp8,0,0.15927466750144958
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,4,128,1,fp8,fp8,0,0.14753599961598715
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,8,128,1,float16,float16,0,0.1628159979979197
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,8,128,1,float16,fp8,0,0.16247999668121338
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,64,8,128,1,fp8,fp8,0,0.15151466925938925
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,64,128,1,float16,float16,0,0.11858133474985759
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,1,128,1,float16,float16,0,0.0883786678314209
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,1,128,1,float16,fp8,0,0.0883733332157135
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,64,128,1,float16,fp8,0,0.1132586697737376
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,1,128,1,fp8,fp8,0,0.08145066599051158
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,2,128,1,float16,float16,0,0.0886240005493164
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,64,128,1,fp8,fp8,0,0.10412266850471497
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,2,128,1,float16,fp8,0,0.08854933579762776
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,2,128,1,fp8,fp8,0,0.08181866506735484
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,4,128,1,float16,float16,0,0.087909330924352
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,4,128,1,float16,fp8,0,0.08842133482297261
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,4,128,1,fp8,fp8,0,0.0816480020682017
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,8,128,1,float16,float16,0,0.09031466643015544
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,8,128,1,float16,fp8,0,0.0899786651134491
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,64,8,128,1,fp8,fp8,0,0.0841919978459676
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,1,128,1,float16,float16,0,0.050944000482559204
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,64,128,1,float16,fp8,0,0.06624533236026764
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,64,128,1,float16,float16,0,0.06810133159160614
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,1,128,1,float16,fp8,0,0.05080533524354299
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,1,128,1,fp8,fp8,0,0.046122665206591286
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,2,128,1,float16,float16,0,0.050810664892196655
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,64,128,1,fp8,fp8,0,0.05996266504128774
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,2,128,1,float16,fp8,0,0.051125332713127136
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,2,128,1,fp8,fp8,0,0.045909335215886436
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,4,128,1,float16,float16,0,0.05142400165398916
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,4,128,1,float16,fp8,0,0.05124799907207489
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,8,128,1,float16,float16,0,0.051925331354141235
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,8,128,1,fp8,fp8,0,0.048138668139775596
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,64,128,1,float16,float16,0,0.035189333061377205
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,8,128,1,float16,fp8,0,0.052757332722345986
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,64,4,128,1,fp8,fp8,0,0.04677866895993551
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,1,128,1,float16,float16,0,0.030954666435718536
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,64,128,1,float16,fp8,0,0.03533333291610082
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,1,128,1,float16,fp8,0,0.03155199935038885
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,64,128,1,fp8,fp8,0,0.0354666660229365
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,1,128,1,fp8,fp8,0,0.029066666960716248
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,2,128,1,float16,float16,0,0.031301334500312805
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,2,128,1,float16,fp8,0,0.03156266609827677
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,2,128,1,fp8,fp8,0,0.02940800040960312
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,4,128,1,float16,float16,0,0.03183999905983607
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,4,128,1,float16,fp8,0,0.03219199925661087
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,4,128,1,fp8,fp8,0,0.031504000226656594
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,8,128,1,float16,float16,0,0.03188266605138779
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,8,128,1,float16,fp8,0,0.032511999209721885
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,64,8,128,1,fp8,fp8,0,0.03153600047032038
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,64,128,1,float16,float16,0,0.022783999641736347
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,64,128,1,float16,fp8,0,0.02351466566324234
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,1,128,1,float16,fp8,0,0.02176533391078313
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,1,128,1,float16,float16,0,0.02163733293612798
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,64,128,1,fp8,fp8,0,0.022917332748572033
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,2,128,1,float16,float16,0,0.021407999098300934
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,1,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,2,128,1,float16,fp8,0,0.02195200075705846
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,2,128,1,fp8,fp8,0,0.021189334491888683
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,4,128,1,float16,float16,0,0.021925332645575207
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,4,128,1,float16,fp8,0,0.02231466770172119
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,4,128,1,fp8,fp8,0,0.022122666239738464
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,8,128,1,float16,float16,0,0.022272000710169475
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,8,128,1,float16,fp8,0,0.022410665949185688
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,64,128,1,float16,fp8,0,0.01893866683046023
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,64,128,1,float16,float16,0,0.018677332748969395
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,64,8,128,1,fp8,fp8,0,0.022453332940737408
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,64,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,1,128,1,float16,float16,0,0.017738666385412216
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,1,128,1,float16,fp8,0,0.01809599995613098
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,1,128,1,fp8,fp8,0,0.01812800019979477
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,2,128,1,float16,float16,0,0.017887999614079792
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,2,128,1,float16,fp8,0,0.01830400029818217
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,2,128,1,fp8,fp8,0,0.018245333184798557
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,4,128,1,float16,float16,0,0.018031999468803406
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,4,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,4,128,1,float16,fp8,0,0.01838933303952217
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,8,128,1,float16,float16,0,0.0180479995906353
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,8,128,1,float16,fp8,0,0.0185759998857975
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,64,8,128,1,fp8,fp8,0,0.01863466699918111
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,64,1,128,1,float16,float16,0,0.7078239917755127
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,64,1,128,1,float16,fp8,0,0.7063946723937988
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,64,1,128,1,fp8,fp8,0,0.7366027037302653
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,64,2,128,1,float16,float16,0,0.7095839977264404
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,64,2,128,1,fp8,fp8,0,0.7374613285064697
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,64,2,128,1,float16,fp8,0,0.7105387051900228
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,64,4,128,1,float16,float16,0,0.7138826847076416
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,64,4,128,1,float16,fp8,0,0.7133706410725912
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,1,128,1,float16,float16,0,0.3609866698582967
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,64,4,128,1,fp8,fp8,0,0.7588213284810384
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,64,8,128,1,float16,float16,0,0.7220479647318522
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,64,8,128,1,float16,fp8,0,0.7221600214640299
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,64,8,128,1,fp8,fp8,0,0.7761440277099609
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,64,128,1,float16,float16,0,0.4245973428090413
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,1,128,1,float16,fp8,0,0.36090131600697833
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,64,128,1,float16,fp8,0,0.4105120102564494
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,64,128,1,fp8,fp8,0,0.44862401485443115
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,1,128,1,fp8,fp8,0,0.3765600124994914
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,2,128,1,float16,float16,0,0.36205867926279706
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,2,128,1,float16,fp8,0,0.362773338953654
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,2,128,1,fp8,fp8,0,0.37745598951975506
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,4,128,1,float16,float16,0,0.36441067854563397
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,4,128,1,float16,fp8,0,0.3644533157348633
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,4,128,1,fp8,fp8,0,0.3802666664123535
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,8,128,1,float16,float16,0,0.36928534507751465
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,8,128,1,float16,fp8,0,0.3700853188832601
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,64,8,128,1,fp8,fp8,0,0.3874826828638713
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,1,128,1,float16,float16,0,0.18892266352971396
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,64,128,1,float16,float16,0,0.22367999951044717
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,1,128,1,float16,fp8,0,0.18970133860905966
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,1,128,1,fp8,fp8,0,0.1979466676712036
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,2,128,1,float16,float16,0,0.1893813411394755
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,64,128,1,float16,fp8,0,0.21596799294153848
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,64,128,1,fp8,fp8,0,0.23333867390950522
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,2,128,1,float16,fp8,0,0.1898933251698812
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,2,128,1,fp8,fp8,0,0.19805334011713663
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,4,128,1,float16,fp8,0,0.19093332688013712
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,4,128,1,fp8,fp8,0,0.1991573373476664
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,4,128,1,float16,float16,0,0.19140267372131348
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,8,128,1,float16,float16,0,0.19369065761566162
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,8,128,1,float16,fp8,0,0.19393599033355713
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,64,8,128,1,fp8,fp8,0,0.20316267013549805
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,64,128,1,float16,float16,0,0.12074666221936543
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,1,128,1,float16,float16,0,0.10276266932487488
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,1,128,1,float16,fp8,0,0.10280000170071919
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,64,128,1,float16,fp8,0,0.11713066697120667
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,1,128,1,fp8,fp8,0,0.1088106632232666
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,2,128,1,float16,float16,0,0.10301867127418518
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,64,128,1,fp8,fp8,0,0.12686399618784586
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,2,128,1,float16,fp8,0,0.1029813289642334
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,2,128,1,fp8,fp8,0,0.10967999696731567
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,4,128,1,float16,float16,0,0.10354666908582051
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,4,128,1,float16,fp8,0,0.1037066678206126
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,4,128,1,fp8,fp8,0,0.10958932836850484
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,8,128,1,float16,fp8,0,0.10416000088055928
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,8,128,1,float16,float16,0,0.10457066694895427
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,64,8,128,1,fp8,fp8,0,0.11132267117500305
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,64,128,1,float16,float16,0,0.0682826687892278
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,1,128,1,float16,float16,0,0.05760000149408976
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,64,128,1,float16,fp8,0,0.0666186660528183
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,1,128,1,float16,fp8,0,0.057333335280418396
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,64,128,1,fp8,fp8,0,0.07266666491826375
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,1,128,1,fp8,fp8,0,0.06052800019582113
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,2,128,1,float16,float16,0,0.05712533493836721
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,4,128,1,float16,float16,0,0.05835733314355215
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,2,128,1,float16,fp8,0,0.0580213318268458
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,4,128,1,fp8,fp8,0,0.060271998246510826
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,2,128,1,fp8,fp8,0,0.06044266621271769
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,4,128,1,float16,fp8,0,0.05762666463851929
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,8,128,1,float16,float16,0,0.05852266649405161
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,8,128,1,float16,fp8,0,0.05836800237496694
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,1,128,1,float16,float16,0,0.03429866582155228
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,64,128,1,float16,float16,0,0.03555733213822047
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,64,8,128,1,fp8,fp8,0,0.06243200103441874
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,1,128,1,float16,fp8,0,0.03414933383464813
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,1,128,1,fp8,fp8,0,0.03633599976698557
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,64,128,1,float16,fp8,0,0.0355679988861084
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,64,128,1,fp8,fp8,0,0.04161600023508072
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,2,128,1,float16,float16,0,0.03460799902677536
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,2,128,1,float16,fp8,0,0.03454933315515518
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,2,128,1,fp8,fp8,0,0.036506667733192444
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,4,128,1,float16,float16,0,0.0351946676770846
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,4,128,1,float16,fp8,0,0.03504000107447306
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,4,128,1,fp8,fp8,0,0.03827733298142751
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,8,128,1,float16,float16,0,0.03514666606982549
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,8,128,1,float16,fp8,0,0.035349334279696144
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,64,8,128,1,fp8,fp8,0,0.03851199895143509
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,64,128,1,float16,float16,0,0.024256000916163128
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,64,128,1,float16,fp8,0,0.02446399877468745
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,1,128,1,float16,float16,0,0.022848000129063923
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,1,128,1,float16,fp8,0,0.02310933421055476
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,64,128,1,fp8,fp8,0,0.026378666361172993
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,1,128,1,fp8,fp8,0,0.0244159996509552
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,2,128,1,float16,float16,0,0.023045333723227184
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,2,128,1,float16,fp8,0,0.023306667804718018
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,2,128,1,fp8,fp8,0,0.024512000381946564
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,4,128,1,float16,float16,0,0.02359466751416524
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,4,128,1,float16,fp8,0,0.02364266663789749
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,4,128,1,fp8,fp8,0,0.02550933261712392
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,8,128,1,float16,float16,0,0.023386667172114056
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,8,128,1,float16,fp8,0,0.023898666103680927
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,64,8,128,1,fp8,fp8,0,0.025765334566434223
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,64,128,1,float16,float16,0,0.0185759998857975
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,64,128,1,float16,fp8,0,0.01851733277241389
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,64,128,1,fp8,fp8,0,0.019658666104078293
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,1,128,1,float16,float16,0,0.01700266698996226
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,1,128,1,float16,fp8,0,0.01762666677435239
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,2,128,1,float16,float16,0,0.017477333545684814
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,1,128,1,fp8,fp8,0,0.01828266680240631
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,2,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,2,128,1,fp8,fp8,0,0.01850133389234543
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,4,128,1,float16,float16,0,0.017690667261679966
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,4,128,1,float16,fp8,0,0.018122666825850803
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,8,128,1,float16,float16,0,0.017727999637524288
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,8,128,1,float16,fp8,0,0.017994667092959087
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,8,128,1,fp8,fp8,0,0.018858666221300762
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,64,4,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,64,128,1,float16,float16,0,0.015802666544914246
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,64,128,1,float16,fp8,0,0.016202667107184727
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,1,128,1,float16,float16,0,0.015578666081031164
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,64,128,1,fp8,fp8,0,0.017344000438849132
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,1,128,1,float16,fp8,0,0.016095999628305435
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,1,128,1,fp8,fp8,0,0.01674666628241539
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,2,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,2,128,1,float16,float16,0,0.015578666081031164
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,2,128,1,float16,fp8,0,0.01597333326935768
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,4,128,1,float16,float16,0,0.015557333827018738
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,4,128,1,float16,fp8,0,0.016197333733240765
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,4,128,1,fp8,fp8,0,0.017370666066805523
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,8,128,1,float16,fp8,0,0.01634666696190834
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,8,128,1,float16,float16,0,0.01598400001724561
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,64,8,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,64,1,128,1,float16,fp8,0,0.49435198307037354
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,64,1,128,1,fp8,fp8,0,0.5857973496119181
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,64,1,128,1,float16,float16,0,0.49516268571217853
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,64,2,128,1,float16,float16,0,0.4964960018793742
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,64,2,128,1,float16,fp8,0,0.49565335114796955
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,64,2,128,1,fp8,fp8,0,0.5871466795603434
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,64,4,128,1,float16,float16,0,0.5009706815083822
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,64,4,128,1,float16,fp8,0,0.5011359850565592
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,1,128,1,float16,float16,0,0.2544586658477783
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,64,4,128,1,fp8,fp8,0,0.5915893316268921
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,64,8,128,1,float16,float16,0,0.507482647895813
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,64,8,128,1,fp8,fp8,0,0.5975840091705322
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,64,8,128,1,float16,fp8,0,0.5074400107065836
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,64,128,1,float16,float16,0,0.281765341758728
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,64,128,1,fp8,fp8,0,0.3372533321380615
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,64,128,1,float16,fp8,0,0.2755413254102071
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,1,128,1,float16,fp8,0,0.2551893393198649
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,1,128,1,fp8,fp8,0,0.30533866087595624
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,2,128,1,float16,float16,0,0.25571199258168537
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,2,128,1,float16,fp8,0,0.2563893397649129
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,2,128,1,fp8,fp8,0,0.30710933605829877
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,4,128,1,float16,float16,0,0.2574346661567688
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,4,128,1,float16,fp8,0,0.25758934020996094
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,4,128,1,fp8,fp8,0,0.3059413234392802
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,8,128,1,float16,float16,0,0.2610666751861572
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,8,128,1,float16,fp8,0,0.2611200014750163
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,64,8,128,1,fp8,fp8,0,0.30833067496617633
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,64,128,1,float16,float16,0,0.1492639978726705
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,1,128,1,float16,float16,0,0.13542399803797403
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,1,128,1,float16,fp8,0,0.1360053320725759
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,64,128,1,float16,fp8,0,0.14541332920392355
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,1,128,1,fp8,fp8,0,0.16218133767445883
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,64,128,1,fp8,fp8,0,0.17858133713404337
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,2,128,1,float16,float16,0,0.13529066244761148
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,2,128,1,float16,fp8,0,0.13565333684285483
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,2,128,1,fp8,fp8,0,0.16325866182645163
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,4,128,1,float16,float16,0,0.13594667116800943
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,4,128,1,float16,fp8,0,0.13607466220855713
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,4,128,1,fp8,fp8,0,0.1637333333492279
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,8,128,1,float16,fp8,0,0.13774399956067404
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,8,128,1,float16,float16,0,0.13778666655222574
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,64,8,128,1,fp8,fp8,0,0.16591466466585794
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,64,128,1,float16,fp8,0,0.08196799953778584
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,1,128,1,float16,float16,0,0.07415466507275899
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,64,128,1,float16,float16,0,0.08365333080291748
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,64,128,1,fp8,fp8,0,0.09879466891288757
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,1,128,1,float16,fp8,0,0.07387199997901917
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,1,128,1,fp8,fp8,0,0.08763733506202698
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,2,128,1,float16,float16,0,0.073253333568573
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,2,128,1,float16,fp8,0,0.07374399900436401
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,2,128,1,fp8,fp8,0,0.08859200278917949
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,4,128,1,float16,float16,0,0.07420266668001811
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,4,128,1,fp8,fp8,0,0.08938666184743245
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,8,128,1,float16,fp8,0,0.07509333391984303
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,8,128,1,float16,float16,0,0.07503999769687653
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,8,128,1,fp8,fp8,0,0.0899679958820343
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,64,128,1,float16,float16,0,0.043935999274253845
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,64,4,128,1,float16,fp8,0,0.07399466633796692
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,64,128,1,float16,fp8,0,0.043477331598599754
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,1,128,1,float16,float16,0,0.04257066547870636
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,1,128,1,float16,fp8,0,0.04247466723124186
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,1,128,1,fp8,fp8,0,0.05005866785844167
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,2,128,1,float16,float16,0,0.04207466542720795
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,64,128,1,fp8,fp8,0,0.05561600128809611
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,2,128,1,float16,fp8,0,0.04261866708596548
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,4,128,1,float16,float16,0,0.043391997615496315
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,8,128,1,float16,float16,0,0.04306666553020477
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,4,128,1,float16,fp8,0,0.043466667334238686
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,4,128,1,fp8,fp8,0,0.05227733155091604
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,8,128,1,float16,fp8,0,0.042992000778516136
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,8,128,1,fp8,fp8,0,0.052101333936055504
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,64,2,128,1,fp8,fp8,0,0.05030400057633718
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,64,128,1,float16,float16,0,0.02701866626739502
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,2,128,1,float16,float16,0,0.027002667387326557
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,64,128,1,float16,fp8,0,0.027322667340437572
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,64,128,1,fp8,fp8,0,0.03331200033426285
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,1,128,1,fp8,fp8,0,0.031109333038330078
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,2,128,1,fp8,fp8,0,0.0316746657093366
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,1,128,1,float16,float16,0,0.026704000929991405
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,1,128,1,float16,fp8,0,0.026608000199000042
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,2,128,1,float16,fp8,0,0.026560001075267792
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,4,128,1,float16,float16,0,0.02737066646416982
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,4,128,1,fp8,fp8,0,0.03217600037654241
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,4,128,1,float16,fp8,0,0.027424000203609467
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,8,128,1,float16,float16,0,0.02685333291689555
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,8,128,1,float16,fp8,0,0.027263998985290527
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,64,8,128,1,fp8,fp8,0,0.03221333275238673
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,64,128,1,float16,float16,0,0.01966399947802226
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,64,128,1,fp8,fp8,0,0.023408000667889912
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,1,128,1,float16,float16,0,0.018730666488409042
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,1,128,1,float16,fp8,0,0.018965333700180054
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,1,128,1,fp8,fp8,0,0.021962667504946392
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,2,128,1,float16,float16,0,0.018661333868900936
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,2,128,1,float16,fp8,0,0.018805333723624546
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,64,128,1,float16,fp8,0,0.01972266659140587
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,2,128,1,fp8,fp8,0,0.022117334107557934
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,4,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,4,128,1,float16,float16,0,0.019098666807015736
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,4,128,1,fp8,fp8,0,0.023013333479563396
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,8,128,1,float16,fp8,0,0.01950399950146675
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,8,128,1,float16,float16,0,0.019178666174411774
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,64,8,128,1,fp8,fp8,0,0.022448000808556873
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,64,128,1,float16,fp8,0,0.01607999950647354
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,64,128,1,fp8,fp8,0,0.018218666315078735
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,1,128,1,float16,float16,0,0.015397333850463232
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,1,128,1,float16,fp8,0,0.015637333194414776
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,64,128,1,float16,float16,0,0.015941333025693893
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,1,128,1,fp8,fp8,0,0.017344000438849132
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,2,128,1,float16,float16,0,0.01509333277742068
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,4,128,1,float16,float16,0,0.015477333217859268
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,2,128,1,fp8,fp8,0,0.017445333302021027
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,2,128,1,float16,fp8,0,0.015642666568358738
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,4,128,1,float16,fp8,0,0.015541333705186844
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,4,128,1,fp8,fp8,0,0.018181333939234417
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,8,128,1,float16,fp8,0,0.015802666544914246
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,64,128,1,float16,float16,0,0.01471466695268949
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,8,128,1,fp8,fp8,0,0.01754133279124896
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,1,128,1,float16,float16,0,0.014362666755914688
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,64,8,128,1,float16,float16,0,0.015493333339691162
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,64,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,1,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,1,128,1,fp8,fp8,0,0.016623999923467636
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,2,128,1,float16,float16,0,0.014469332993030548
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,2,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,2,128,1,fp8,fp8,0,0.016741332908471424
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,4,128,1,float16,float16,0,0.01431999976436297
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,4,128,1,fp8,fp8,0,0.016794666647911072
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,8,128,1,float16,float16,0,0.014560000350077948
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,8,128,1,fp8,fp8,0,0.016789333273967106
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,8,128,1,float16,fp8,0,0.014981333166360855
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,64,4,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,64,1,128,1,float16,float16,0,0.4248480002085368
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,64,1,128,1,float16,fp8,0,0.42590399583180744
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,64,1,128,1,fp8,fp8,0,0.5091893275578817
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,64,2,128,1,fp8,fp8,0,0.5109653472900391
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,64,2,128,1,float16,float16,0,0.42561066150665283
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,64,2,128,1,float16,fp8,0,0.42721601327260333
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,64,4,128,1,float16,float16,0,0.427562673886617
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,64,4,128,1,float16,fp8,0,0.4267413218816121
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,64,4,128,1,fp8,fp8,0,0.5113120079040527
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,64,8,128,1,float16,float16,0,0.42977599302927655
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,64,8,128,1,float16,fp8,0,0.4294026692708333
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,64,8,128,1,fp8,fp8,0,0.5157920122146606
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,64,128,1,float16,float16,0,0.21909334262212118
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,64,128,1,fp8,fp8,0,0.28037333488464355
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,64,128,1,float16,fp8,0,0.21527999639511108
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,1,128,1,float16,fp8,0,0.22018667062123617
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,2,128,1,float16,float16,0,0.2206559975941976
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,1,128,1,fp8,fp8,0,0.2635680039723714
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,1,128,1,float16,float16,0,0.22019733985265097
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,2,128,1,float16,fp8,0,0.21963733434677124
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,2,128,1,fp8,fp8,0,0.2646239995956421
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,4,128,1,float16,float16,0,0.2203893264134725
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,4,128,1,float16,fp8,0,0.22146666049957275
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,4,128,1,fp8,fp8,0,0.2654026746749878
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,8,128,1,float16,fp8,0,0.22180799643198648
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,8,128,1,float16,float16,0,0.22104533513387045
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,64,128,1,float16,float16,0,0.11600533127784729
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,1,128,1,float16,float16,0,0.11698133746782939
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,64,8,128,1,fp8,fp8,0,0.2667413353919983
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,64,128,1,float16,fp8,0,0.11470400293668111
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,64,128,1,fp8,fp8,0,0.14948800206184387
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,1,128,1,fp8,fp8,0,0.138565331697464
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,1,128,1,float16,fp8,0,0.11547733346621196
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,2,128,1,float16,float16,0,0.11585600177447002
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,2,128,1,float16,fp8,0,0.11730666955312093
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,2,128,1,fp8,fp8,0,0.1393013298511505
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,4,128,1,float16,float16,0,0.1167039970556895
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,4,128,1,float16,fp8,0,0.11646399895350139
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,4,128,1,fp8,fp8,0,0.1406613290309906
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,8,128,1,float16,float16,0,0.11658666531244914
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,8,128,1,fp8,fp8,0,0.14174933234850565
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,64,8,128,1,float16,fp8,0,0.11647466818491618
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,64,128,1,float16,float16,0,0.06105599800745646
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,1,128,1,float16,float16,0,0.0645546664794286
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,1,128,1,float16,fp8,0,0.06474133332570393
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,2,128,1,float16,float16,0,0.0650133341550827
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,64,128,1,float16,fp8,0,0.05972800155480703
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,1,128,1,fp8,fp8,0,0.07819733520348866
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,2,128,1,float16,fp8,0,0.06463466584682465
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,2,128,1,fp8,fp8,0,0.07851733267307281
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,64,128,1,fp8,fp8,0,0.08169066905975342
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,4,128,1,float16,float16,0,0.06543999910354614
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,4,128,1,float16,fp8,0,0.06545066833496094
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,4,128,1,fp8,fp8,0,0.07969599962234497
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,8,128,1,float16,fp8,0,0.06586133440335591
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,8,128,1,float16,float16,0,0.06585066517194112
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,64,8,128,1,fp8,fp8,0,0.07986133297284444
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,64,128,1,float16,float16,0,0.03624533365170161
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,1,128,1,float16,float16,0,0.03807466725508372
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,1,128,1,float16,fp8,0,0.03813866774241129
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,64,128,1,fp8,fp8,0,0.04749333361784617
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,1,128,1,fp8,fp8,0,0.04574400186538696
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,2,128,1,float16,fp8,0,0.03827200084924698
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,2,128,1,float16,float16,0,0.03843733419974645
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,64,128,1,float16,fp8,0,0.036677333215872444
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,2,128,1,fp8,fp8,0,0.045663997530937195
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,4,128,1,float16,float16,0,0.038719999293486275
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,4,128,1,float16,fp8,0,0.03868799904982249
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,4,128,1,fp8,fp8,0,0.04619200030962626
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,8,128,1,float16,fp8,0,0.0386559988061587
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,8,128,1,float16,float16,0,0.03839999934037527
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,64,8,128,1,fp8,fp8,0,0.04653333127498627
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,64,128,1,float16,fp8,0,0.02478933334350586
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,64,128,1,float16,float16,0,0.02459733436505
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,1,128,1,float16,fp8,0,0.024298667907714844
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,64,128,1,fp8,fp8,0,0.03073066721359889
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,1,128,1,float16,float16,0,0.02455466737349828
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,2,128,1,float16,float16,0,0.024373332659403484
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,2,128,1,fp8,fp8,0,0.029343999922275543
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,1,128,1,fp8,fp8,0,0.02922133356332779
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,2,128,1,float16,fp8,0,0.024549332757790882
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,4,128,1,float16,fp8,0,0.02460266649723053
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,4,128,1,fp8,fp8,0,0.03009066730737686
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,8,128,1,float16,float16,0,0.02473066747188568
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,8,128,1,float16,fp8,0,0.02462399999300639
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,4,128,1,float16,float16,0,0.02442666639884313
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,64,128,1,float16,float16,0,0.018031999468803406
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,64,128,1,float16,fp8,0,0.018298666924238205
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,64,8,128,1,fp8,fp8,0,0.029989334444204967
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,64,128,1,fp8,fp8,0,0.021477334201335907
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,1,128,1,float16,fp8,0,0.018357332795858383
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,1,128,1,float16,float16,0,0.017909333109855652
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,1,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,2,128,1,float16,float16,0,0.017840000490347546
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,2,128,1,fp8,fp8,0,0.02109333376089732
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,2,128,1,float16,fp8,0,0.018165333817402523
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,4,128,1,float16,float16,0,0.017935999979575474
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,4,128,1,float16,fp8,0,0.018191999445358913
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,4,128,1,fp8,fp8,0,0.021551998953024547
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,8,128,1,float16,float16,0,0.018090666582187016
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,8,128,1,float16,fp8,0,0.0183146670460701
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,64,128,1,float16,fp8,0,0.015301333119471868
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,64,8,128,1,fp8,fp8,0,0.021759999295075733
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,64,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,1,128,1,float16,float16,0,0.014730667074521383
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,1,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,1,128,1,fp8,fp8,0,0.01718933383623759
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,2,128,1,float16,float16,0,0.015114666273196539
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,2,128,1,float16,fp8,0,0.015754666179418564
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,2,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,4,128,1,float16,fp8,0,0.015541333705186844
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,4,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,8,128,1,float16,float16,0,0.01492799942692121
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,8,128,1,float16,fp8,0,0.01545599972208341
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,8,128,1,fp8,fp8,0,0.017194667210181553
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,64,4,128,1,float16,float16,0,0.015173333386580149
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,64,128,1,float16,float16,0,0.014245333770910898
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,64,128,1,float16,fp8,0,0.014618666221698126
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,64,128,1,fp8,fp8,0,0.01637866720557213
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,1,128,1,float16,fp8,0,0.014778666198253632
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,1,128,1,float16,float16,0,0.014064000298579534
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,1,128,1,fp8,fp8,0,0.016421332955360413
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,2,128,1,float16,float16,0,0.013994666437307993
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,2,128,1,fp8,fp8,0,0.016538667182127636
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,4,128,1,float16,float16,0,0.014202666779359182
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,2,128,1,float16,fp8,0,0.014639999717473984
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,4,128,1,float16,fp8,0,0.014677333335081736
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,4,128,1,fp8,fp8,0,0.016480000068744022
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,8,128,1,float16,float16,0,0.014085333794355392
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,8,128,1,float16,fp8,0,0.014533333480358124
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,64,8,128,1,fp8,fp8,0,0.016447999825080235
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,48,1,128,1,float16,float16,0,49.66834513346354
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,48,1,128,1,float16,fp8,0,48.91515096028646
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,48,2,128,1,float16,float16,0,49.19395955403646
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,48,2,128,1,float16,fp8,0,49.28018697102865
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,48,2,128,1,fp8,fp8,0,32.384256998697914
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,48,4,128,1,float16,float16,0,49.56791687011719
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,48,4,128,1,float16,fp8,0,48.389994303385414
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,48,1,128,1,fp8,fp8,0,32.09994761149088
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,1,128,1,float16,float16,0,24.750574747721355
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,48,4,128,1,fp8,fp8,0,32.39458719889323
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,48,8,128,1,float16,float16,0,49.19695536295573
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,48,8,128,1,fp8,fp8,0,32.10500844319662
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,48,8,128,1,float16,fp8,0,49.225311279296875
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,1,128,1,float16,fp8,0,24.719039916992188
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,1,128,1,fp8,fp8,0,16.351685841878254
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,48,128,1,float16,fp8,0,24.517985026041668
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,48,128,1,fp8,fp8,0,16.041898091634113
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,48,128,1,float16,float16,0,24.797386169433594
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,2,128,1,float16,float16,0,24.686314900716145
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,2,128,1,float16,fp8,0,24.771087646484375
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,2,128,1,fp8,fp8,0,16.086043039957683
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,4,128,1,float16,float16,0,24.78620147705078
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,4,128,1,float16,fp8,0,24.85358428955078
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,4,128,1,fp8,fp8,0,16.346725463867188
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,8,128,1,float16,float16,0,24.53630828857422
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,1,128,1,float16,float16,0,12.712224324544271
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,8,128,1,fp8,fp8,0,16.269994099934895
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,1,128,1,float16,fp8,0,12.616928100585938
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,48,8,128,1,float16,fp8,0,25.267354329427082
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,1,128,1,fp8,fp8,0,8.080479939778646
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,2,128,1,float16,float16,0,12.685749053955078
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,48,128,1,float16,float16,0,12.360037485758463
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,2,128,1,float16,fp8,0,12.503482818603516
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,2,128,1,fp8,fp8,0,8.11518923441569
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,48,128,1,fp8,fp8,0,8.123573303222656
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,48,128,1,float16,fp8,0,12.504266103108725
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,4,128,1,float16,float16,0,12.771589914957682
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,4,128,1,float16,fp8,0,12.403663635253906
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,4,128,1,fp8,fp8,0,8.223765055338541
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,1,128,1,float16,float16,0,6.11685307820638
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,8,128,1,fp8,fp8,0,8.361514409383139
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,8,128,1,float16,float16,0,12.771402994791666
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,48,8,128,1,float16,fp8,0,12.505696614583334
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,48,128,1,float16,float16,0,6.021018981933594
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,48,128,1,float16,fp8,0,6.256341298421224
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,1,128,1,float16,fp8,0,6.495536168416341
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,48,128,1,fp8,fp8,0,4.26040522257487
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,1,128,1,fp8,fp8,0,4.139397303263347
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,2,128,1,float16,float16,0,6.2498931884765625
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,2,128,1,fp8,fp8,0,4.136938730875651
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,2,128,1,float16,fp8,0,6.1757386525472
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,4,128,1,float16,float16,0,6.370613098144531
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,4,128,1,fp8,fp8,0,4.163034756978353
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,4,128,1,float16,fp8,0,6.148725509643555
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,8,128,1,float16,float16,0,6.566272099812825
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,8,128,1,float16,fp8,0,6.21237309773763
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,48,8,128,1,fp8,fp8,0,4.109290758768718
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,48,1,128,1,float16,float16,0,28.51610056559245
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,48,1,128,1,float16,fp8,0,28.501686096191406
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,48,1,128,1,fp8,fp8,0,18.817189534505207
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,48,2,128,1,float16,float16,0,28.7640380859375
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,48,2,128,1,fp8,fp8,0,19.004229227701824
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,48,2,128,1,float16,fp8,0,28.507130940755207
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,48,4,128,1,float16,fp8,0,28.594512939453125
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,48,4,128,1,float16,float16,0,28.108500162760418
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,1,128,1,float16,float16,0,14.1867307027181
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,48,4,128,1,fp8,fp8,0,18.654176076253254
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,48,8,128,1,float16,fp8,0,28.54222361246745
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,48,8,128,1,fp8,fp8,0,18.95962651570638
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,48,8,128,1,float16,float16,0,28.764127095540363
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,1,128,1,float16,fp8,0,14.127503712972006
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,1,128,1,fp8,fp8,0,9.443157196044922
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,48,128,1,float16,fp8,0,14.332992553710938
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,48,128,1,float16,float16,0,14.382586161295572
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,48,128,1,fp8,fp8,0,9.624287923177084
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,2,128,1,float16,fp8,0,14.416762034098307
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,2,128,1,float16,float16,0,14.602090199788412
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,2,128,1,fp8,fp8,0,9.408389409383139
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,4,128,1,float16,float16,0,14.399717966715494
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,4,128,1,float16,fp8,0,14.36294428507487
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,4,128,1,fp8,fp8,0,9.438240051269531
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,8,128,1,float16,float16,0,14.549339294433594
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,8,128,1,float16,fp8,0,14.433781941731771
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,1,128,1,float16,float16,0,7.143295923868815
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,1,128,1,float16,fp8,0,6.977573394775391
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,48,8,128,1,fp8,fp8,0,9.539626439412435
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,1,128,1,fp8,fp8,0,4.840319951375325
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,2,128,1,float16,float16,0,7.413514455159505
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,48,128,1,float16,float16,0,7.225429534912109
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,48,128,1,float16,fp8,0,6.97221310933431
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,48,128,1,fp8,fp8,0,4.727519989013672
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,2,128,1,float16,fp8,0,7.225061416625977
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,2,128,1,fp8,fp8,0,4.757765452067058
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,4,128,1,float16,float16,0,7.157562891642253
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,4,128,1,fp8,fp8,0,4.721141179402669
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,4,128,1,float16,fp8,0,6.959930419921875
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,8,128,1,float16,float16,0,7.160736083984375
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,8,128,1,float16,fp8,0,7.082304000854492
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,48,8,128,1,fp8,fp8,0,4.674288113911946
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,48,128,1,float16,float16,0,3.580047925313314
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,1,128,1,float16,float16,0,3.6325225830078125
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,1,128,1,float16,fp8,0,3.543050765991211
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,48,128,1,float16,fp8,0,3.539525349934896
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,2,128,1,float16,float16,0,3.620885213216146
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,1,128,1,fp8,fp8,0,2.4730025927225747
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,48,128,1,fp8,fp8,0,2.4819253285725913
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,2,128,1,float16,fp8,0,3.611680030822754
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,2,128,1,fp8,fp8,0,2.4590986569722495
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,4,128,1,float16,float16,0,3.6303787231445312
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,4,128,1,float16,fp8,0,3.546762784322103
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,4,128,1,fp8,fp8,0,2.4719413121541343
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,8,128,1,float16,float16,0,3.6389118830362954
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,8,128,1,fp8,fp8,0,2.4851892789204917
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,48,8,128,1,float16,fp8,0,3.6607786814371743
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,48,1,128,1,float16,fp8,0,20.06054941813151
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,48,1,128,1,fp8,fp8,0,13.567274729410807
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,48,1,128,1,float16,float16,0,20.152603149414062
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,48,2,128,1,float16,float16,0,20.601807912190754
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,48,2,128,1,float16,fp8,0,20.12226104736328
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,48,2,128,1,fp8,fp8,0,13.349466959635416
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,48,4,128,1,float16,float16,0,20.27674102783203
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,48,4,128,1,float16,fp8,0,20.132160186767578
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,1,128,1,float16,float16,0,10.177178700764975
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,48,4,128,1,fp8,fp8,0,13.535439809163412
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,48,8,128,1,float16,float16,0,20.369232177734375
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,48,8,128,1,float16,fp8,0,20.044357299804688
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,48,8,128,1,fp8,fp8,0,13.780202229817709
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,48,128,1,float16,fp8,0,10.196186701456705
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,48,128,1,float16,float16,0,10.30087979634603
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,1,128,1,fp8,fp8,0,6.741717020670573
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,48,128,1,fp8,fp8,0,6.899450937906901
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,1,128,1,float16,fp8,0,10.0479736328125
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,2,128,1,float16,float16,0,10.523504257202148
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,2,128,1,float16,fp8,0,10.25104014078776
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,2,128,1,fp8,fp8,0,6.658965428670247
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,4,128,1,float16,float16,0,10.154832204182943
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,4,128,1,float16,fp8,0,10.152234395345053
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,4,128,1,fp8,fp8,0,6.668602625528972
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,8,128,1,float16,float16,0,10.340933481852213
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,8,128,1,float16,fp8,0,10.1658935546875
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,1,128,1,float16,float16,0,5.115845362345378
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,1,128,1,float16,fp8,0,4.80782413482666
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,48,8,128,1,fp8,fp8,0,6.803802490234375
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,48,128,1,float16,float16,0,5.016112009684245
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,1,128,1,fp8,fp8,0,3.392613410949707
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,48,128,1,float16,fp8,0,4.8391679128011065
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,2,128,1,float16,float16,0,4.920778592427571
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,2,128,1,float16,fp8,0,5.080746650695801
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,2,128,1,fp8,fp8,0,3.325637181599935
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,48,128,1,fp8,fp8,0,3.3977438608805337
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,4,128,1,float16,float16,0,5.083242734273274
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,4,128,1,float16,fp8,0,5.091530799865723
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,4,128,1,fp8,fp8,0,3.4138612747192383
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,8,128,1,float16,float16,0,5.042448043823242
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,1,128,1,float16,float16,0,2.575658639272054
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,8,128,1,fp8,fp8,0,3.3771146138509116
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,1,128,1,float16,fp8,0,2.5689120292663574
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,48,8,128,1,float16,fp8,0,5.108218510945638
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,48,128,1,float16,float16,0,2.5370613733927407
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,48,128,1,float16,fp8,0,2.5146506627400718
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,48,128,1,fp8,fp8,0,1.8255893389383953
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,1,128,1,fp8,fp8,0,1.8052266438802083
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,2,128,1,float16,float16,0,2.576570669809977
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,2,128,1,fp8,fp8,0,1.8136906623840332
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,2,128,1,float16,fp8,0,2.556586742401123
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,4,128,1,float16,fp8,0,2.5621280670166016
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,4,128,1,float16,float16,0,2.6078880627950034
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,4,128,1,fp8,fp8,0,1.8087414105733235
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,8,128,1,float16,float16,0,2.601125399271647
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,8,128,1,float16,fp8,0,2.5624799728393555
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,48,8,128,1,fp8,fp8,0,1.8127306302388508
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,48,1,128,1,float16,float16,0,26.928916931152344
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,48,1,128,1,fp8,fp8,0,17.66220219930013
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,48,1,128,1,float16,fp8,0,26.506624857584637
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,48,2,128,1,fp8,fp8,0,17.935887654622395
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,48,2,128,1,float16,fp8,0,26.163856506347656
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,48,2,128,1,float16,float16,0,26.788767496744793
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,48,4,128,1,float16,fp8,0,26.77123769124349
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,48,4,128,1,float16,float16,0,27.25994110107422
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,1,128,1,float16,float16,0,13.571664174397787
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,48,4,128,1,fp8,fp8,0,18.060651143391926
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,48,8,128,1,float16,float16,0,26.929285685221355
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,48,8,128,1,float16,fp8,0,26.578272501627605
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,1,128,1,float16,fp8,0,13.579952239990234
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,48,8,128,1,fp8,fp8,0,18.04942448933919
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,48,128,1,float16,float16,0,13.337061564127604
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,48,128,1,float16,fp8,0,13.501637776692709
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,48,128,1,fp8,fp8,0,9.196282704671225
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,1,128,1,fp8,fp8,0,9.11674690246582
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,2,128,1,float16,float16,0,13.395434061686197
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,2,128,1,fp8,fp8,0,9.076159795125326
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,2,128,1,float16,fp8,0,13.346672058105469
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,4,128,1,float16,float16,0,13.492794036865234
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,4,128,1,float16,fp8,0,13.178485870361328
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,4,128,1,fp8,fp8,0,9.052480061848959
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,8,128,1,float16,float16,0,13.452128092447916
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,8,128,1,float16,fp8,0,13.53005345662435
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,1,128,1,float16,float16,0,6.62222417195638
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,1,128,1,float16,fp8,0,6.703088124593099
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,48,8,128,1,fp8,fp8,0,9.084421157836914
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,1,128,1,fp8,fp8,0,4.3735198974609375
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,2,128,1,float16,float16,0,6.807568232218425
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,48,128,1,float16,fp8,0,6.840986887613933
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,2,128,1,float16,fp8,0,6.635589599609375
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,48,128,1,fp8,fp8,0,4.5471146901448565
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,48,128,1,float16,float16,0,6.8037064870198565
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,2,128,1,fp8,fp8,0,4.4355519612630205
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,4,128,1,float16,float16,0,6.6273759206136065
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,4,128,1,float16,fp8,0,6.797445297241211
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,4,128,1,fp8,fp8,0,4.363456090291341
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,8,128,1,fp8,fp8,0,4.383285204569499
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,8,128,1,float16,fp8,0,6.55734380086263
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,48,8,128,1,float16,float16,0,6.535439809163411
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,48,128,1,float16,float16,0,3.2455199559529624
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,1,128,1,float16,float16,0,3.3039894104003906
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,1,128,1,float16,fp8,0,3.242426554361979
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,1,128,1,fp8,fp8,0,2.2749387423197427
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,2,128,1,float16,float16,0,3.2348159154256186
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,48,128,1,float16,fp8,0,3.2706238428751626
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,2,128,1,float16,fp8,0,3.24399471282959
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,2,128,1,fp8,fp8,0,2.2744693756103516
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,48,128,1,fp8,fp8,0,2.315648078918457
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,4,128,1,float16,float16,0,3.3622827529907227
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,4,128,1,float16,fp8,0,3.2484000523885093
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,4,128,1,fp8,fp8,0,2.283023993174235
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,8,128,1,float16,float16,0,3.2439893086751304
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,8,128,1,float16,fp8,0,3.2703307469685874
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,1,128,1,float16,float16,0,1.7597546577453613
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,48,8,128,1,fp8,fp8,0,2.286959966023763
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,48,128,1,float16,float16,0,1.7336586316426594
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,48,128,1,float16,fp8,0,1.712831974029541
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,1,128,1,float16,fp8,0,1.7349866231282551
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,1,128,1,fp8,fp8,0,1.2476320266723633
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,48,128,1,fp8,fp8,0,1.2640213171641033
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,2,128,1,float16,fp8,0,1.7382826805114746
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,2,128,1,fp8,fp8,0,1.2547893524169922
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,2,128,1,float16,float16,0,1.764586607615153
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,4,128,1,float16,float16,0,1.7535413106282551
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,4,128,1,float16,fp8,0,1.725327968597412
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,4,128,1,fp8,fp8,0,1.2546772956848145
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,8,128,1,float16,fp8,0,1.7293386459350586
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,8,128,1,float16,float16,0,1.7435733477274578
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,48,8,128,1,fp8,fp8,0,1.2632053693135579
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,48,1,128,1,float16,float16,0,15.715498606363932
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,48,1,128,1,fp8,fp8,0,11.120618184407553
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,48,1,128,1,float16,fp8,0,15.750432332356771
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,48,2,128,1,fp8,fp8,0,10.708340962727865
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,48,2,128,1,float16,fp8,0,15.76959482828776
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,48,2,128,1,float16,float16,0,15.728052775065104
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,48,4,128,1,float16,float16,0,15.625082651774088
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,48,4,128,1,float16,fp8,0,15.87667719523112
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,1,128,1,float16,float16,0,7.7958189646403
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,48,4,128,1,fp8,fp8,0,10.778799692789713
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,1,128,1,float16,fp8,0,7.884080251057942
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,48,8,128,1,float16,float16,0,16.19152577718099
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,48,8,128,1,fp8,fp8,0,10.883380889892578
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,48,8,128,1,float16,fp8,0,15.688949584960938
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,48,128,1,float16,fp8,0,7.864410400390625
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,48,128,1,float16,float16,0,7.880032221476237
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,48,128,1,fp8,fp8,0,5.533786773681641
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,1,128,1,fp8,fp8,0,5.251722653706868
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,2,128,1,float16,float16,0,7.913306554158528
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,2,128,1,float16,fp8,0,8.00052261352539
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,2,128,1,fp8,fp8,0,5.358256022135417
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,4,128,1,float16,float16,0,7.9464162190755205
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,4,128,1,float16,fp8,0,7.742346445719401
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,4,128,1,fp8,fp8,0,5.266405423482259
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,8,128,1,float16,float16,0,7.704111735026042
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,8,128,1,float16,fp8,0,7.774885177612305
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,1,128,1,float16,float16,0,3.7462666829427085
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,1,128,1,float16,fp8,0,3.7715733846028647
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,1,128,1,fp8,fp8,0,2.6397600173950195
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,48,8,128,1,fp8,fp8,0,5.31276257832845
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,48,128,1,float16,float16,0,3.8138453165690103
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,2,128,1,float16,float16,0,3.8765172958374023
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,48,128,1,float16,fp8,0,3.9317280451456704
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,2,128,1,float16,fp8,0,3.8001438776652017
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,2,128,1,fp8,fp8,0,2.6669066747029624
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,4,128,1,float16,float16,0,3.817808151245117
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,48,128,1,fp8,fp8,0,2.809141476949056
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,4,128,1,float16,fp8,0,3.873178799947103
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,4,128,1,fp8,fp8,0,2.6591946283976235
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,1,128,1,float16,float16,0,1.9812533060709636
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,8,128,1,float16,float16,0,3.8831733067830405
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,8,128,1,float16,fp8,0,3.8229173024495444
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,48,8,128,1,fp8,fp8,0,2.674864133199056
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,48,128,1,float16,float16,0,1.9865546226501465
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,1,128,1,float16,fp8,0,1.941450595855713
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,1,128,1,fp8,fp8,0,1.4157546361287434
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,2,128,1,float16,float16,0,1.9743040402730305
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,48,128,1,float16,fp8,0,1.9388532638549805
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,2,128,1,float16,fp8,0,1.9371412595113118
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,2,128,1,fp8,fp8,0,1.4131627082824707
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,48,128,1,fp8,fp8,0,1.4679946899414062
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,4,128,1,float16,float16,0,1.9834933280944824
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,4,128,1,fp8,fp8,0,1.4167466163635254
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,4,128,1,float16,fp8,0,1.9463413556416829
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,8,128,1,float16,fp8,0,1.9384640057881672
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,8,128,1,float16,float16,0,1.9801759719848633
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,48,8,128,1,fp8,fp8,0,1.4221386909484863
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,48,128,1,float16,float16,0,1.0697279771169026
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,48,128,1,float16,fp8,0,1.0705653031667073
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,1,128,1,float16,float16,0,1.0843946933746338
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,1,128,1,float16,fp8,0,1.059712012608846
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,48,128,1,fp8,fp8,0,0.812122662862142
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,1,128,1,fp8,fp8,0,0.7926826477050781
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,2,128,1,float16,float16,0,1.0819786389668782
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,2,128,1,float16,fp8,0,1.061306635538737
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,2,128,1,fp8,fp8,0,0.793824036916097
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,4,128,1,fp8,fp8,0,0.7981279691060384
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,4,128,1,float16,fp8,0,1.0686079661051433
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,4,128,1,float16,float16,0,1.086085319519043
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,8,128,1,float16,float16,0,1.0868480205535889
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,8,128,1,float16,fp8,0,1.0606133143107097
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,48,8,128,1,fp8,fp8,0,0.8018720149993896
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,48,1,128,1,float16,float16,0,15.375130971272787
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,48,1,128,1,float16,fp8,0,15.141157786051432
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,48,1,128,1,fp8,fp8,0,10.868186950683594
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,48,2,128,1,float16,float16,0,15.5349489847819
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,48,2,128,1,float16,fp8,0,15.193227132161459
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,48,2,128,1,fp8,fp8,0,10.734090169270834
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,48,4,128,1,float16,float16,0,15.174602508544922
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,48,4,128,1,float16,fp8,0,15.136138916015625
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,1,128,1,float16,float16,0,7.571877161661784
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,48,4,128,1,fp8,fp8,0,10.749829610188803
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,1,128,1,float16,fp8,0,7.4786027272542315
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,48,8,128,1,fp8,fp8,0,10.988778432210287
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,48,8,128,1,float16,float16,0,15.389450073242188
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,48,8,128,1,float16,fp8,0,15.410325368245443
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,48,128,1,float16,float16,0,7.760351816813151
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,48,128,1,float16,fp8,0,7.71780268351237
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,48,128,1,fp8,fp8,0,5.521141052246094
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,1,128,1,fp8,fp8,0,5.183087984720866
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,2,128,1,float16,fp8,0,7.677050908406575
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,2,128,1,float16,float16,0,7.663834889729817
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,2,128,1,fp8,fp8,0,5.256271998087565
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,4,128,1,float16,float16,0,7.73093859354655
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,4,128,1,fp8,fp8,0,5.252837181091309
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,4,128,1,float16,fp8,0,7.621957143147786
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,8,128,1,float16,float16,0,7.606314977010091
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,8,128,1,float16,fp8,0,7.648576100667317
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,1,128,1,float16,float16,0,3.650304158528646
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,1,128,1,float16,fp8,0,3.687162717183431
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,48,8,128,1,fp8,fp8,0,5.4661439259847
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,1,128,1,fp8,fp8,0,2.62444273630778
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,48,128,1,float16,fp8,0,3.6752373377482095
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,48,128,1,float16,float16,0,3.7085065841674805
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,2,128,1,float16,float16,0,3.625957489013672
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,48,128,1,fp8,fp8,0,2.8127574920654297
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,2,128,1,float16,fp8,0,3.5954453150431314
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,2,128,1,fp8,fp8,0,2.6418347358703613
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,4,128,1,float16,float16,0,3.7873706817626953
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,4,128,1,float16,fp8,0,3.5566345850626626
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,4,128,1,fp8,fp8,0,2.644378662109375
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,8,128,1,float16,float16,0,3.675013224283854
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,8,128,1,float16,fp8,0,3.6807146072387695
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,1,128,1,float16,float16,0,1.867029349009196
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,1,128,1,float16,fp8,0,1.8396587371826172
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,48,8,128,1,fp8,fp8,0,2.668575922648112
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,48,128,1,float16,float16,0,1.922800064086914
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,1,128,1,fp8,fp8,0,1.3738239606221516
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,2,128,1,float16,float16,0,1.8717066446940105
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,48,128,1,float16,fp8,0,1.9125919342041016
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,2,128,1,float16,fp8,0,1.8362986246744792
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,48,128,1,fp8,fp8,0,1.4780799547831218
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,2,128,1,fp8,fp8,0,1.3768426577250164
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,4,128,1,float16,float16,0,1.8801226615905762
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,4,128,1,fp8,fp8,0,1.384485403696696
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,4,128,1,float16,fp8,0,1.8598346710205078
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,8,128,1,float16,float16,0,1.8820427258809407
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,8,128,1,float16,fp8,0,1.8747199376424153
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,48,8,128,1,fp8,fp8,0,1.3933119773864746
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,1,128,1,float16,fp8,0,0.9850133260091146
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,1,128,1,float16,float16,0,1.004325310389201
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,48,128,1,float16,float16,0,1.0179200172424316
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,48,128,1,float16,fp8,0,1.0008320013682048
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,1,128,1,fp8,fp8,0,0.7482720216115316
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,48,128,1,fp8,fp8,0,0.7859679857889811
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,2,128,1,float16,fp8,0,0.9824533462524414
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,2,128,1,float16,float16,0,1.008570671081543
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,2,128,1,fp8,fp8,0,0.7492266496022543
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,4,128,1,float16,float16,0,1.005343993504842
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,4,128,1,float16,fp8,0,0.9903413454691569
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,4,128,1,fp8,fp8,0,0.7520639896392822
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,8,128,1,float16,float16,0,1.00654935836792
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,8,128,1,float16,fp8,0,0.9891946315765381
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,1,128,1,float16,float16,0,0.5710879961649576
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,48,8,128,1,fp8,fp8,0,0.756773312886556
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,1,128,1,float16,fp8,0,0.5555253426233927
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,48,128,1,float16,float16,0,0.5695893367131551
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,48,128,1,float16,fp8,0,0.5630133152008057
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,1,128,1,fp8,fp8,0,0.415887991587321
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,48,128,1,fp8,fp8,0,0.44516801834106445
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,2,128,1,float16,float16,0,0.5671999851862589
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,2,128,1,float16,fp8,0,0.5591413180033366
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,2,128,1,fp8,fp8,0,0.41672531763712567
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,4,128,1,float16,float16,0,0.5691466728846232
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,4,128,1,fp8,fp8,0,0.4196853240331014
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,8,128,1,float16,float16,0,0.5711199839909872
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,4,128,1,float16,fp8,0,0.5597013235092163
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,8,128,1,float16,fp8,0,0.562112013498942
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,48,8,128,1,fp8,fp8,0,0.4203679958979289
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,48,1,128,1,float16,float16,0,9.385056177775065
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,48,1,128,1,fp8,fp8,0,6.623727798461914
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,48,1,128,1,float16,fp8,0,9.101696014404297
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,48,2,128,1,float16,float16,0,9.306602478027344
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,48,2,128,1,float16,fp8,0,9.239354451497396
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,48,2,128,1,fp8,fp8,0,6.729333241780599
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,48,4,128,1,float16,float16,0,9.374122619628906
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,48,4,128,1,float16,fp8,0,9.387866973876953
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,1,128,1,float16,float16,0,4.613114674886067
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,48,4,128,1,fp8,fp8,0,6.714799880981445
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,48,8,128,1,float16,float16,0,9.426858901977539
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,48,8,128,1,fp8,fp8,0,6.697397232055664
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,48,8,128,1,float16,fp8,0,9.195797602335611
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,1,128,1,float16,fp8,0,4.474992116292317
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,48,128,1,float16,float16,0,4.757712046305339
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,48,128,1,float16,fp8,0,4.6208906173706055
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,1,128,1,fp8,fp8,0,3.2887627283732095
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,48,128,1,fp8,fp8,0,3.578650792439779
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,2,128,1,float16,float16,0,4.520677248636882
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,2,128,1,float16,fp8,0,4.359653472900391
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,2,128,1,fp8,fp8,0,3.3073012034098306
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,4,128,1,float16,float16,0,4.5382080078125
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,4,128,1,float16,fp8,0,4.3342132568359375
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,4,128,1,fp8,fp8,0,3.31984011332194
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,8,128,1,float16,fp8,0,4.540181477864583
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,8,128,1,float16,float16,0,4.598954518636067
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,1,128,1,float16,float16,0,2.241487979888916
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,1,128,1,float16,fp8,0,2.218506654103597
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,48,8,128,1,fp8,fp8,0,3.3445971806844077
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,48,128,1,float16,float16,0,2.3379359245300293
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,1,128,1,fp8,fp8,0,1.6775892575581868
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,2,128,1,float16,float16,0,2.2638986905415854
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,48,128,1,float16,fp8,0,2.2807092666625977
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,48,128,1,fp8,fp8,0,1.8264053662618
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,2,128,1,float16,fp8,0,2.2006613413492837
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,2,128,1,fp8,fp8,0,1.6855467160542805
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,4,128,1,float16,float16,0,2.2483946482340493
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,4,128,1,float16,fp8,0,2.2228852907816568
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,4,128,1,fp8,fp8,0,1.6945707003275554
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,8,128,1,float16,float16,0,2.2798825899759927
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,1,128,1,float16,float16,0,1.1746506690979004
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,8,128,1,float16,fp8,0,2.2127040227254233
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,48,8,128,1,fp8,fp8,0,1.704309304555257
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,1,128,1,float16,fp8,0,1.1511253515879314
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,1,128,1,fp8,fp8,0,0.8917493025461832
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,48,128,1,float16,float16,0,1.2005386352539062
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,2,128,1,float16,float16,0,1.1777386665344238
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,2,128,1,float16,fp8,0,1.1494560241699219
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,48,128,1,fp8,fp8,0,0.965119997660319
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,2,128,1,fp8,fp8,0,0.8873493671417236
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,48,128,1,float16,fp8,0,1.2038506666819255
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,4,128,1,float16,float16,0,1.1766346295674641
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,4,128,1,float16,fp8,0,1.1603786945343018
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,4,128,1,fp8,fp8,0,0.8951786359151205
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,8,128,1,float16,float16,0,1.184997320175171
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,1,128,1,float16,float16,0,0.6430079936981201
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,8,128,1,fp8,fp8,0,0.903551975886027
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,1,128,1,float16,fp8,0,0.6212693452835083
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,48,8,128,1,float16,fp8,0,1.157311995824178
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,48,128,1,float16,float16,0,0.646069327990214
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,48,128,1,float16,fp8,0,0.6442399819691976
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,48,128,1,fp8,fp8,0,0.5270666678746542
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,1,128,1,fp8,fp8,0,0.49267200628916424
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,2,128,1,float16,float16,0,0.6414080063501993
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,2,128,1,float16,fp8,0,0.6245439847310384
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,2,128,1,fp8,fp8,0,0.4914453426996867
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,4,128,1,float16,float16,0,0.638586680094401
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,4,128,1,float16,fp8,0,0.6242613395055135
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,8,128,1,float16,float16,0,0.6425386667251587
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,4,128,1,fp8,fp8,0,0.4970453182856242
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,8,128,1,float16,fp8,0,0.6303573449452718
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,48,8,128,1,fp8,fp8,0,0.5017280181248983
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,1,128,1,float16,float16,0,0.37059199810028076
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,48,128,1,float16,float16,0,0.3725706736246745
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,48,128,1,float16,fp8,0,0.36773868401845294
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,1,128,1,float16,fp8,0,0.36318401495615643
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,48,128,1,fp8,fp8,0,0.2989013393719991
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,1,128,1,fp8,fp8,0,0.2804479996363322
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,2,128,1,float16,float16,0,0.3728426694869995
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,2,128,1,float16,fp8,0,0.3643893400828044
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,2,128,1,fp8,fp8,0,0.27846399943033856
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,4,128,1,float16,float16,0,0.37273065249125165
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,4,128,1,float16,fp8,0,0.36429333686828613
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,4,128,1,fp8,fp8,0,0.2826026678085327
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,8,128,1,float16,float16,0,0.37336532274882
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,8,128,1,fp8,fp8,0,0.28331732749938965
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,48,8,128,1,float16,fp8,0,0.367029349009196
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,48,1,128,1,float16,float16,0,9.736709594726562
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,48,1,128,1,fp8,fp8,0,7.1977494557698565
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,48,1,128,1,float16,fp8,0,9.557509104410807
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,48,2,128,1,float16,float16,0,9.592618942260742
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,48,2,128,1,float16,fp8,0,9.421765645345053
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,48,4,128,1,float16,float16,0,9.667376200358072
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,48,2,128,1,fp8,fp8,0,7.183626810709636
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,48,4,128,1,float16,fp8,0,9.403578440348307
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,1,128,1,float16,float16,0,4.629616101582845
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,48,4,128,1,fp8,fp8,0,7.304010391235352
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,48,8,128,1,float16,float16,0,9.747002919514975
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,1,128,1,float16,fp8,0,4.624677340189616
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,48,8,128,1,fp8,fp8,0,7.302757263183594
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,48,8,128,1,float16,fp8,0,9.536362965901693
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,48,128,1,float16,float16,0,4.988800048828125
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,48,128,1,float16,fp8,0,4.959328015645345
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,1,128,1,fp8,fp8,0,3.5805066426595054
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,48,128,1,fp8,fp8,0,3.9437920252482095
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,2,128,1,float16,float16,0,4.671706517537435
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,2,128,1,float16,fp8,0,4.505663871765137
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,2,128,1,fp8,fp8,0,3.589946746826172
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,4,128,1,float16,float16,0,4.7715145746866865
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,4,128,1,float16,fp8,0,4.497498512268066
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,4,128,1,fp8,fp8,0,3.616154670715332
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,8,128,1,float16,float16,0,4.785402615865071
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,1,128,1,float16,float16,0,2.3000693321228027
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,8,128,1,float16,fp8,0,4.5547787348429365
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,1,128,1,float16,fp8,0,2.252789338429769
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,48,8,128,1,fp8,fp8,0,3.647162755330404
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,1,128,1,fp8,fp8,0,1.8024214108784993
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,48,128,1,float16,float16,0,2.4494239489237466
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,2,128,1,float16,float16,0,2.308805306752523
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,48,128,1,float16,fp8,0,2.3982826868693032
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,2,128,1,float16,fp8,0,2.244032065073649
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,48,128,1,fp8,fp8,0,2.0073973337809243
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,2,128,1,fp8,fp8,0,1.8058773676554363
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,4,128,1,float16,float16,0,2.307706673940023
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,4,128,1,float16,fp8,0,2.2695520718892417
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,4,128,1,fp8,fp8,0,1.820042610168457
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,8,128,1,float16,float16,0,2.328213373819987
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,1,128,1,float16,float16,0,1.193008025487264
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,8,128,1,fp8,fp8,0,1.835525353749593
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,48,8,128,1,float16,fp8,0,2.2765013376871743
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,48,128,1,float16,float16,0,1.2435359954833984
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,48,128,1,float16,fp8,0,1.23909330368042
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,1,128,1,float16,fp8,0,1.1618399620056152
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,1,128,1,fp8,fp8,0,0.9353546301523844
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,48,128,1,fp8,fp8,0,1.039306640625
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,2,128,1,float16,float16,0,1.1924693584442139
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,2,128,1,float16,fp8,0,1.1586346626281738
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,2,128,1,fp8,fp8,0,0.9365599950154623
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,4,128,1,float16,float16,0,1.1900800069173176
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,4,128,1,float16,fp8,0,1.1671786308288574
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,4,128,1,fp8,fp8,0,0.9452533721923828
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,8,128,1,float16,float16,0,1.2056586742401123
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,1,128,1,float16,float16,0,0.6310346523920695
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,8,128,1,float16,fp8,0,1.1748639742533367
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,48,8,128,1,fp8,fp8,0,0.9583840370178223
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,1,128,1,float16,fp8,0,0.6171199878056844
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,48,128,1,float16,fp8,0,0.6455733378728231
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,48,128,1,float16,float16,0,0.6488426526387533
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,48,128,1,fp8,fp8,0,0.5576106707255045
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,1,128,1,fp8,fp8,0,0.5071093241373698
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,2,128,1,float16,fp8,0,0.6174079974492391
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,2,128,1,float16,float16,0,0.6328746477762858
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,2,128,1,fp8,fp8,0,0.5055626630783081
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,4,128,1,float16,float16,0,0.6346133152643839
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,4,128,1,float16,fp8,0,0.6184213161468506
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,4,128,1,fp8,fp8,0,0.5058293342590332
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,8,128,1,float16,float16,0,0.6352320114771525
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,8,128,1,float16,fp8,0,0.6251360177993774
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,48,8,128,1,fp8,fp8,0,0.5111680030822754
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,48,128,1,float16,float16,0,0.36268266042073566
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,48,128,1,float16,fp8,0,0.35868799686431885
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,1,128,1,float16,float16,0,0.35231467088063556
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,1,128,1,float16,fp8,0,0.34255464871724445
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,1,128,1,fp8,fp8,0,0.27427200476328534
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,2,128,1,float16,float16,0,0.3535786469777425
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,48,128,1,fp8,fp8,0,0.3107306758562724
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,2,128,1,float16,fp8,0,0.3439360062281291
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,2,128,1,fp8,fp8,0,0.2760266661643982
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,4,128,1,float16,float16,0,0.35606932640075684
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,4,128,1,float16,fp8,0,0.3453173240025838
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,4,128,1,fp8,fp8,0,0.27594133218129474
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,8,128,1,float16,float16,0,0.3549813429514567
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,8,128,1,float16,fp8,0,0.34828801949818927
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,48,128,1,float16,float16,0,0.2059040069580078
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,48,8,128,1,fp8,fp8,0,0.2808693250020345
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,1,128,1,float16,float16,0,0.1966879963874817
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,48,128,1,float16,fp8,0,0.2032853364944458
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,48,128,1,fp8,fp8,0,0.1795413295427958
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,1,128,1,float16,fp8,0,0.19104532400767008
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,1,128,1,fp8,fp8,0,0.16524266203244528
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,2,128,1,float16,float16,0,0.19755200544993082
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,2,128,1,float16,fp8,0,0.1912426749865214
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,4,128,1,float16,float16,0,0.19704532623291016
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,4,128,1,float16,fp8,0,0.19247466325759888
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,2,128,1,fp8,fp8,0,0.1662986675898234
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,8,128,1,float16,float16,0,0.19778132438659668
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,8,128,1,float16,fp8,0,0.1927093267440796
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,4,128,1,fp8,fp8,0,0.16691199938456217
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,48,8,128,1,fp8,fp8,0,0.1684373418490092
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,48,1,128,1,float16,float16,0,6.035941441853841
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,48,1,128,1,fp8,fp8,0,4.812623977661133
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,48,2,128,1,fp8,fp8,0,4.827781359354655
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,48,2,128,1,float16,fp8,0,5.798074722290039
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,48,2,128,1,float16,float16,0,6.0687306722005205
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,48,1,128,1,float16,fp8,0,5.929877599080403
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,48,4,128,1,float16,fp8,0,5.960010528564453
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,48,4,128,1,float16,float16,0,6.1084747314453125
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,1,128,1,float16,float16,0,2.967365264892578
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,48,4,128,1,fp8,fp8,0,4.847135861714681
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,1,128,1,float16,fp8,0,2.8702878952026367
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,48,8,128,1,float16,float16,0,6.176853179931641
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,48,8,128,1,float16,fp8,0,6.156202952067058
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,48,8,128,1,fp8,fp8,0,4.912986755371094
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,48,128,1,float16,float16,0,3.2084426879882812
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,48,128,1,fp8,fp8,0,2.7123467127482095
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,1,128,1,fp8,fp8,0,2.401007970174154
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,48,128,1,float16,fp8,0,3.116607983907064
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,2,128,1,float16,float16,0,2.969418525695801
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,2,128,1,float16,fp8,0,2.8756160736083984
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,2,128,1,fp8,fp8,0,2.4066346486409507
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,4,128,1,float16,float16,0,2.967322667439779
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,4,128,1,fp8,fp8,0,2.4288105964660645
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,4,128,1,float16,fp8,0,2.8962879180908203
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,8,128,1,float16,float16,0,2.988133430480957
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,8,128,1,float16,fp8,0,2.8996639251708984
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,1,128,1,float16,float16,0,1.4974239667256672
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,48,8,128,1,fp8,fp8,0,2.460319995880127
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,1,128,1,float16,fp8,0,1.4566879272460938
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,48,128,1,float16,float16,0,1.6046026547749836
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,1,128,1,fp8,fp8,0,1.2225653330485027
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,48,128,1,float16,fp8,0,1.5750986735026042
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,2,128,1,float16,float16,0,1.4935626983642578
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,48,128,1,fp8,fp8,0,1.3807627360026042
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,2,128,1,float16,fp8,0,1.4609920183817546
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,2,128,1,fp8,fp8,0,1.2245653470357258
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,4,128,1,float16,float16,0,1.5046292940775554
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,4,128,1,float16,fp8,0,1.4654879570007324
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,4,128,1,fp8,fp8,0,1.2277493476867676
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,8,128,1,float16,float16,0,1.510639985402425
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,8,128,1,float16,fp8,0,1.4756372769673665
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,1,128,1,float16,float16,0,0.7793920040130615
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,1,128,1,float16,fp8,0,0.7555999755859375
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,48,8,128,1,fp8,fp8,0,1.2444053490956624
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,48,128,1,float16,fp8,0,0.8166613578796387
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,48,128,1,float16,float16,0,0.821722666422526
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,1,128,1,fp8,fp8,0,0.6371466716130575
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,2,128,1,float16,float16,0,0.7802293300628662
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,48,128,1,fp8,fp8,0,0.7248746554056803
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,2,128,1,float16,fp8,0,0.7593333721160889
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,2,128,1,fp8,fp8,0,0.6408799886703491
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,4,128,1,float16,float16,0,0.7806133429209391
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,4,128,1,float16,fp8,0,0.7642026742299398
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,4,128,1,fp8,fp8,0,0.6468533277511597
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,8,128,1,float16,float16,0,0.7860799630482992
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,8,128,1,float16,fp8,0,0.7691786289215088
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,1,128,1,float16,float16,0,0.4217280149459839
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,1,128,1,float16,fp8,0,0.4078506628672282
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,48,128,1,float16,float16,0,0.4397546847661336
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,48,8,128,1,fp8,fp8,0,0.654858668645223
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,48,128,1,float16,fp8,0,0.43247465292612713
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,1,128,1,fp8,fp8,0,0.34648001194000244
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,48,128,1,fp8,fp8,0,0.3906506697336833
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,2,128,1,float16,fp8,0,0.41052265961964923
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,2,128,1,float16,float16,0,0.41940800348917645
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,2,128,1,fp8,fp8,0,0.34918399651845294
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,4,128,1,float16,float16,0,0.4223039944966634
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,4,128,1,fp8,fp8,0,0.35120534896850586
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,4,128,1,float16,fp8,0,0.411141316095988
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,8,128,1,float16,float16,0,0.4227786858876546
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,8,128,1,float16,fp8,0,0.4121439854303996
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,48,8,128,1,fp8,fp8,0,0.35585065682729083
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,48,128,1,float16,float16,0,0.24734399716059366
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,1,128,1,float16,float16,0,0.23795199394226074
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,48,128,1,float16,fp8,0,0.2448586622873942
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,1,128,1,float16,fp8,0,0.23117866118748984
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,1,128,1,fp8,fp8,0,0.19089599450429282
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,48,128,1,fp8,fp8,0,0.21475734313329062
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,2,128,1,float16,float16,0,0.2378879984219869
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,2,128,1,float16,fp8,0,0.23138666152954102
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,2,128,1,fp8,fp8,0,0.19109867016474405
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,4,128,1,float16,float16,0,0.24001065889994302
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,4,128,1,float16,fp8,0,0.23318399985631308
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,4,128,1,fp8,fp8,0,0.19364267587661743
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,8,128,1,float16,fp8,0,0.23568532864252725
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,8,128,1,float16,float16,0,0.24093866348266602
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,48,8,128,1,fp8,fp8,0,0.19458667437235513
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,48,128,1,float16,float16,0,0.14320000012715658
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,48,128,1,float16,fp8,0,0.1406773328781128
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,1,128,1,float16,float16,0,0.13661866386731467
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,1,128,1,float16,fp8,0,0.13332266608874002
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,48,128,1,fp8,fp8,0,0.12898666659990946
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,1,128,1,fp8,fp8,0,0.11715733011563619
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,2,128,1,float16,fp8,0,0.13346133629480997
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,2,128,1,float16,float16,0,0.13621866703033447
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,2,128,1,fp8,fp8,0,0.11687999963760376
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,4,128,1,float16,float16,0,0.13618666927019754
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,4,128,1,float16,fp8,0,0.13421333829561868
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,8,128,1,float16,float16,0,0.13713600238164267
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,8,128,1,float16,fp8,0,0.13339733084042868
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,8,128,1,fp8,fp8,0,0.12146666646003723
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,48,4,128,1,fp8,fp8,0,0.11938666303952535
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,48,1,128,1,float16,fp8,0,6.021045049031575
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,48,1,128,1,float16,float16,0,5.941973368326823
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,48,2,128,1,float16,float16,0,5.881178538004558
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,48,2,128,1,fp8,fp8,0,4.980218569437663
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,48,2,128,1,float16,fp8,0,5.857797622680664
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,48,1,128,1,fp8,fp8,0,4.8410186767578125
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,48,4,128,1,float16,float16,0,6.035594940185547
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,48,4,128,1,float16,fp8,0,6.2024586995442705
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,1,128,1,float16,float16,0,2.864800135294596
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,48,4,128,1,fp8,fp8,0,5.44163703918457
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,48,8,128,1,float16,float16,0,6.344816207885742
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,48,8,128,1,fp8,fp8,0,5.483786900838216
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,48,8,128,1,float16,fp8,0,6.1708424886067705
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,1,128,1,float16,fp8,0,2.8437280654907227
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,48,128,1,float16,float16,0,3.323024113972982
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,48,128,1,float16,fp8,0,3.245690663655599
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,1,128,1,fp8,fp8,0,2.4271252950032554
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,2,128,1,float16,float16,0,2.913562774658203
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,48,128,1,fp8,fp8,0,2.805381457010905
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,2,128,1,fp8,fp8,0,2.4932106335957847
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,2,128,1,float16,fp8,0,2.9006080627441406
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,4,128,1,float16,fp8,0,3.028479894002279
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,4,128,1,float16,float16,0,3.020869255065918
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,4,128,1,fp8,fp8,0,2.71562131245931
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,8,128,1,float16,float16,0,3.0460373560587564
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,1,128,1,float16,fp8,0,1.4299306869506836
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,1,128,1,float16,float16,0,1.4361440340677898
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,8,128,1,float16,fp8,0,3.089285214742025
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,48,8,128,1,fp8,fp8,0,2.7430454889933267
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,1,128,1,fp8,fp8,0,1.2242720127105713
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,48,128,1,float16,float16,0,1.6665120124816895
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,48,128,1,float16,fp8,0,1.6257440249125164
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,2,128,1,float16,float16,0,1.4483307202657063
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,2,128,1,fp8,fp8,0,1.247477372487386
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,2,128,1,float16,fp8,0,1.446133295694987
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,48,128,1,fp8,fp8,0,1.4007199605305989
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,4,128,1,float16,float16,0,1.5105706850687664
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,4,128,1,float16,fp8,0,1.518613338470459
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,4,128,1,fp8,fp8,0,1.3710719744364421
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,8,128,1,float16,float16,0,1.5172799428304036
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,8,128,1,float16,fp8,0,1.5352800687154133
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,1,128,1,float16,float16,0,0.7319466272989908
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,1,128,1,float16,fp8,0,0.731386661529541
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,1,128,1,fp8,fp8,0,0.6127306620279948
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,48,8,128,1,fp8,fp8,0,1.3811519940694172
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,48,128,1,float16,fp8,0,0.8135519822438558
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,48,128,1,float16,float16,0,0.8408746719360352
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,2,128,1,float16,fp8,0,0.7391040325164795
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,2,128,1,fp8,fp8,0,0.634762684504191
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,2,128,1,float16,float16,0,0.7383200327555338
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,48,128,1,fp8,fp8,0,0.7070293426513672
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,4,128,1,float16,float16,0,0.7628426551818848
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,4,128,1,float16,fp8,0,0.7564000288645426
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,4,128,1,fp8,fp8,0,0.6991893450419108
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,8,128,1,float16,float16,0,0.7669119834899902
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,8,128,1,float16,fp8,0,0.7751626968383789
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,1,128,1,float16,float16,0,0.376362681388855
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,48,8,128,1,fp8,fp8,0,0.7032639980316162
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,48,128,1,float16,float16,0,0.42787198225657147
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,48,128,1,float16,fp8,0,0.4161119858423869
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,1,128,1,float16,fp8,0,0.3773760000864665
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,48,128,1,fp8,fp8,0,0.3636373281478882
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,1,128,1,fp8,fp8,0,0.31940799951553345
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,2,128,1,float16,float16,0,0.3828746477762858
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,2,128,1,float16,fp8,0,0.3781866629918416
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,2,128,1,fp8,fp8,0,0.3260800043741862
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,4,128,1,float16,float16,0,0.3911626736323039
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,4,128,1,float16,fp8,0,0.39079467455546063
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,4,128,1,fp8,fp8,0,0.3571573495864868
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,8,128,1,float16,float16,0,0.3947840134302775
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,8,128,1,fp8,fp8,0,0.36321067810058594
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,48,128,1,float16,float16,0,0.2269279956817627
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,48,8,128,1,float16,fp8,0,0.3936266501744588
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,1,128,1,float16,float16,0,0.2007360061009725
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,48,128,1,float16,fp8,0,0.22373332579930624
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,1,128,1,float16,fp8,0,0.2018079956372579
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,48,128,1,fp8,fp8,0,0.18997865915298462
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,1,128,1,fp8,fp8,0,0.15562132994333902
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,2,128,1,float16,float16,0,0.20300267140070596
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,2,128,1,float16,fp8,0,0.20206934213638306
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,4,128,1,float16,fp8,0,0.20747200647989908
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,2,128,1,fp8,fp8,0,0.1584160029888153
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,4,128,1,float16,float16,0,0.20666666825612387
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,4,128,1,fp8,fp8,0,0.1750453313191732
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,8,128,1,float16,float16,0,0.21104532480239868
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,8,128,1,float16,fp8,0,0.20863999923070273
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,48,8,128,1,fp8,fp8,0,0.17941333850224814
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,1,128,1,float16,float16,0,0.10665599505106609
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,1,128,1,float16,fp8,0,0.1074133316675822
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,48,128,1,float16,float16,0,0.1251146694024404
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,1,128,1,fp8,fp8,0,0.08552533388137817
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,48,128,1,float16,fp8,0,0.12310933073361714
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,48,128,1,fp8,fp8,0,0.10162132978439331
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,2,128,1,float16,float16,0,0.10728533069292705
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,2,128,1,float16,fp8,0,0.10754666725794475
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,2,128,1,fp8,fp8,0,0.08689600229263306
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,4,128,1,float16,float16,0,0.11013866464296977
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,4,128,1,fp8,fp8,0,0.09317866961161296
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,4,128,1,float16,fp8,0,0.10983467102050781
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,8,128,1,float16,float16,0,0.11014399925867717
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,8,128,1,fp8,fp8,0,0.09537600477536519
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,48,8,128,1,float16,fp8,0,0.1111840009689331
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,48,128,1,float16,float16,0,0.06638399759928386
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,48,128,1,float16,fp8,0,0.06483733157316844
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,1,128,1,float16,float16,0,0.058261334896087646
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,48,128,1,fp8,fp8,0,0.05945600072542826
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,1,128,1,float16,fp8,0,0.05821333328882853
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,1,128,1,fp8,fp8,0,0.04911999901135763
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,2,128,1,float16,float16,0,0.05891199906667074
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,2,128,1,float16,fp8,0,0.05876266459623972
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,2,128,1,fp8,fp8,0,0.049866666396458946
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,4,128,1,float16,float16,0,0.05986666679382324
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,4,128,1,float16,fp8,0,0.05982933441797892
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,4,128,1,fp8,fp8,0,0.053226664662361145
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,8,128,1,float16,float16,0,0.060138667623202004
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,8,128,1,fp8,fp8,0,0.05397866666316986
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,48,8,128,1,float16,fp8,0,0.061237335205078125
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,48,1,128,1,float16,fp8,0,4.522960027058919
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,48,2,128,1,float16,float16,0,4.6091359456380205
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,48,2,128,1,float16,fp8,0,4.609231948852539
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,48,2,128,1,fp8,fp8,0,4.174213409423828
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,48,1,128,1,fp8,fp8,0,4.0552107493082685
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,48,1,128,1,float16,float16,0,4.551461219787598
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,48,4,128,1,float16,float16,0,4.792458534240723
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,48,4,128,1,float16,fp8,0,4.779631932576497
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,1,128,1,float16,float16,0,2.216554641723633
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,48,4,128,1,fp8,fp8,0,4.642938613891602
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,1,128,1,float16,fp8,0,2.213808059692383
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,48,8,128,1,float16,float16,0,4.826885223388672
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,48,8,128,1,fp8,fp8,0,4.674223899841309
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,48,128,1,float16,float16,0,2.707104047139486
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,48,8,128,1,float16,fp8,0,4.901498794555664
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,48,128,1,fp8,fp8,0,2.406533400217692
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,1,128,1,fp8,fp8,0,2.0235519409179688
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,48,128,1,float16,fp8,0,2.651818593343099
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,2,128,1,float16,float16,0,2.2888639767964682
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,2,128,1,fp8,fp8,0,2.0838239987691245
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,2,128,1,float16,fp8,0,2.282303969065348
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,4,128,1,float16,float16,0,2.39845863978068
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,4,128,1,fp8,fp8,0,2.30838934580485
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,4,128,1,float16,fp8,0,2.393738587697347
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,8,128,1,float16,float16,0,2.4168853759765625
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,8,128,1,float16,fp8,0,2.4412479400634766
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,1,128,1,float16,float16,0,1.122762680053711
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,48,8,128,1,fp8,fp8,0,2.34334929784139
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,1,128,1,float16,fp8,0,1.1171733538309734
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,1,128,1,fp8,fp8,0,1.0116053422292073
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,48,128,1,float16,fp8,0,1.3141120274861653
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,48,128,1,float16,float16,0,1.3384265899658203
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,2,128,1,float16,fp8,0,1.1321333249409993
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,2,128,1,float16,float16,0,1.1345226764678955
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,2,128,1,fp8,fp8,0,1.0496373176574707
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,4,128,1,float16,float16,0,1.1965546607971191
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,48,128,1,fp8,fp8,0,1.2058026790618896
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,4,128,1,float16,fp8,0,1.1928160190582275
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,4,128,1,fp8,fp8,0,1.1601119836171467
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,8,128,1,float16,float16,0,1.2017227013905842
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,1,128,1,float16,float16,0,0.573802669843038
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,48,128,1,float16,float16,0,0.6794453461964926
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,8,128,1,fp8,fp8,0,1.1828052997589111
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,1,128,1,float16,fp8,0,0.5693600177764893
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,48,128,1,float16,fp8,0,0.657690684000651
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,1,128,1,fp8,fp8,0,0.5068426529566447
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,48,128,1,fp8,fp8,0,0.6066399812698364
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,48,8,128,1,float16,fp8,0,1.2191466490427654
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,2,128,1,float16,float16,0,0.5803786516189575
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,2,128,1,float16,fp8,0,0.5786986748377482
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,2,128,1,fp8,fp8,0,0.5347253481547037
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,4,128,1,float16,float16,0,0.5978986819585165
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,4,128,1,float16,fp8,0,0.5964800119400024
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,4,128,1,fp8,fp8,0,0.5892906586329142
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,8,128,1,float16,float16,0,0.6036693255106608
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,8,128,1,float16,fp8,0,0.6086399952570597
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,1,128,1,float16,float16,0,0.2937493324279785
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,48,8,128,1,fp8,fp8,0,0.5978506803512573
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,1,128,1,float16,fp8,0,0.29257599512736004
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,1,128,1,fp8,fp8,0,0.261189341545105
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,48,128,1,float16,float16,0,0.3515466849009196
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,48,128,1,float16,fp8,0,0.34392531712849933
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,2,128,1,float16,fp8,0,0.297487994035085
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,2,128,1,fp8,fp8,0,0.26898666222890216
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,2,128,1,float16,float16,0,0.2970133423805237
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,48,128,1,fp8,fp8,0,0.31082133452097577
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,4,128,1,float16,float16,0,0.30701865752538043
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,4,128,1,float16,fp8,0,0.307584007581075
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,4,128,1,fp8,fp8,0,0.30154667297999066
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,8,128,1,float16,float16,0,0.31226666768391925
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,8,128,1,float16,fp8,0,0.3108746608098348
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,48,8,128,1,fp8,fp8,0,0.3072533408800761
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,48,128,1,float16,float16,0,0.18722132841746011
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,48,128,1,float16,fp8,0,0.18144534031550089
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,1,128,1,float16,float16,0,0.15705066919326782
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,1,128,1,float16,fp8,0,0.1579093337059021
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,48,128,1,fp8,fp8,0,0.16275200247764587
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,1,128,1,fp8,fp8,0,0.12972266475359598
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,2,128,1,float16,float16,0,0.15846932927767435
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,2,128,1,float16,fp8,0,0.15819733341534933
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,2,128,1,fp8,fp8,0,0.13210133711496988
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,4,128,1,float16,float16,0,0.16290666659673056
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,4,128,1,float16,fp8,0,0.1627946694691976
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,4,128,1,fp8,fp8,0,0.14842133720715842
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,8,128,1,float16,float16,0,0.1664959987004598
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,8,128,1,float16,fp8,0,0.16459199786186218
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,48,8,128,1,fp8,fp8,0,0.15179733435312906
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,1,128,1,float16,float16,0,0.08674666285514832
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,48,128,1,float16,fp8,0,0.10292800267537434
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,48,128,1,float16,float16,0,0.10525866349538167
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,48,128,1,fp8,fp8,0,0.08734400073687236
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,1,128,1,float16,fp8,0,0.08716799815495808
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,1,128,1,fp8,fp8,0,0.07217066486676534
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,2,128,1,float16,float16,0,0.08764800429344177
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,2,128,1,float16,fp8,0,0.08685866991678874
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,2,128,1,fp8,fp8,0,0.07381866872310638
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,4,128,1,float16,fp8,0,0.09016000231107076
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,4,128,1,float16,float16,0,0.08948266506195068
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,4,128,1,fp8,fp8,0,0.07905599971612294
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,8,128,1,float16,float16,0,0.08975467085838318
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,8,128,1,float16,fp8,0,0.08922666311264038
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,48,8,128,1,fp8,fp8,0,0.08186133205890656
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,48,128,1,float16,float16,0,0.05698133508364359
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,48,128,1,float16,fp8,0,0.05594133337338766
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,1,128,1,float16,fp8,0,0.04892266790072123
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,1,128,1,float16,float16,0,0.048613334695498146
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,1,128,1,fp8,fp8,0,0.04301333427429199
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,48,128,1,fp8,fp8,0,0.05291733145713806
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,2,128,1,float16,float16,0,0.0488373339176178
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,2,128,1,float16,fp8,0,0.04938133557637533
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,2,128,1,fp8,fp8,0,0.043103997906049095
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,4,128,1,float16,fp8,0,0.050474668542544045
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,4,128,1,float16,float16,0,0.05036266644795736
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,8,128,1,float16,float16,0,0.05049066742261251
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,8,128,1,float16,fp8,0,0.05050133168697357
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,4,128,1,fp8,fp8,0,0.046666666865348816
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,48,8,128,1,fp8,fp8,0,0.04730133215586344
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,48,128,1,float16,float16,0,0.03329599897066752
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,48,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,1,128,1,float16,float16,0,0.03102933367093404
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,48,128,1,fp8,fp8,0,0.03254399945338567
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,1,128,1,float16,fp8,0,0.030928000807762146
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,1,128,1,fp8,fp8,0,0.02905600021282832
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,2,128,1,float16,float16,0,0.031258667508761086
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,2,128,1,float16,fp8,0,0.031136001149813335
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,2,128,1,fp8,fp8,0,0.029605334003766377
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,4,128,1,float16,float16,0,0.031727999448776245
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,4,128,1,fp8,fp8,0,0.031285333136717476
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,8,128,1,float16,float16,0,0.03183466692765554
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,8,128,1,float16,fp8,0,0.031930667658646904
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,8,128,1,fp8,fp8,0,0.031157332162062328
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,48,4,128,1,float16,fp8,0,0.03219199925661087
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,48,1,128,1,float16,float16,0,1.9118293126424153
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,48,1,128,1,float16,fp8,0,1.899295965830485
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,48,1,128,1,fp8,fp8,0,1.6608853340148926
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,48,2,128,1,float16,float16,0,1.988810698191325
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,48,2,128,1,fp8,fp8,0,1.7277812957763672
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,48,2,128,1,float16,fp8,0,1.9645013809204102
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,48,4,128,1,float16,fp8,0,2.0825279553731284
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,48,4,128,1,float16,float16,0,2.086341381072998
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,1,128,1,float16,float16,0,0.9670720100402832
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,48,4,128,1,fp8,fp8,0,1.9665865898132324
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,48,8,128,1,float16,fp8,0,2.1215573946634927
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,48,8,128,1,fp8,fp8,0,2.028085390726725
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,48,8,128,1,float16,float16,0,2.093514601389567
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,1,128,1,float16,fp8,0,0.9606293042500814
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,48,128,1,float16,float16,0,1.2076480388641357
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,48,128,1,float16,fp8,0,1.1748053232828777
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,1,128,1,fp8,fp8,0,0.8261866569519043
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,48,128,1,fp8,fp8,0,1.0433066685994465
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,2,128,1,float16,fp8,0,0.9734826882680258
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,2,128,1,float16,float16,0,0.9817012945810953
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,2,128,1,fp8,fp8,0,0.8737706343332926
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,4,128,1,float16,float16,0,1.0431733131408691
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,4,128,1,float16,fp8,0,1.0366506576538086
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,4,128,1,fp8,fp8,0,0.989029328028361
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,8,128,1,float16,float16,0,1.0413813591003418
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,1,128,1,float16,float16,0,0.49380799134572345
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,8,128,1,float16,fp8,0,1.0616106986999512
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,1,128,1,float16,fp8,0,0.4910080035527547
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,48,8,128,1,fp8,fp8,0,1.0188159942626953
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,48,128,1,float16,float16,0,0.6048213243484497
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,1,128,1,fp8,fp8,0,0.4177279869715373
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,2,128,1,float16,float16,0,0.5013759930928549
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,48,128,1,fp8,fp8,0,0.5265493392944336
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,48,128,1,float16,fp8,0,0.5907093286514282
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,2,128,1,fp8,fp8,0,0.44465601444244385
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,2,128,1,float16,fp8,0,0.4964746634165446
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,4,128,1,float16,float16,0,0.5196266571680704
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,4,128,1,float16,fp8,0,0.517034649848938
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,4,128,1,fp8,fp8,0,0.4998133182525635
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,8,128,1,float16,float16,0,0.5258880058924357
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,8,128,1,float16,fp8,0,0.5300266742706299
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,48,8,128,1,fp8,fp8,0,0.5155306657155355
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,48,128,1,float16,float16,0,0.31375465790430707
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,1,128,1,float16,float16,0,0.25332266092300415
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,48,128,1,float16,fp8,0,0.30347732702891034
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,1,128,1,float16,fp8,0,0.252074658870697
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,1,128,1,fp8,fp8,0,0.21784534056981406
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,2,128,1,float16,fp8,0,0.2566133340199788
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,2,128,1,float16,float16,0,0.2564799984296163
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,48,128,1,fp8,fp8,0,0.2712053259213765
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,2,128,1,fp8,fp8,0,0.22578134139378866
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,4,128,1,float16,float16,0,0.26658666133880615
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,4,128,1,float16,fp8,0,0.2648213307062785
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,4,128,1,fp8,fp8,0,0.2579786578814189
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,8,128,1,float16,float16,0,0.27035733064015705
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,8,128,1,fp8,fp8,0,0.265882670879364
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,48,8,128,1,float16,fp8,0,0.26923733949661255
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,1,128,1,float16,float16,0,0.13596266508102417
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,1,128,1,float16,fp8,0,0.13570132851600647
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,48,128,1,float16,float16,0,0.1660480002562205
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,48,128,1,float16,fp8,0,0.16266666849454245
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,48,128,1,fp8,fp8,0,0.14959999918937683
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,1,128,1,fp8,fp8,0,0.11739200353622437
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,2,128,1,float16,float16,0,0.13737600048383078
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,2,128,1,float16,fp8,0,0.1366933286190033
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,2,128,1,fp8,fp8,0,0.11895466844240825
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,4,128,1,float16,float16,0,0.14131733775138855
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,4,128,1,float16,fp8,0,0.14089066783587137
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,4,128,1,fp8,fp8,0,0.1350879967212677
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,8,128,1,float16,float16,0,0.14494933684666952
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,8,128,1,float16,fp8,0,0.14366400241851807
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,1,128,1,float16,float16,0,0.07668266693751018
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,1,128,1,float16,fp8,0,0.07713599999745686
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,48,8,128,1,fp8,fp8,0,0.1383840044339498
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,48,128,1,float16,float16,0,0.09752532839775085
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,48,128,1,fp8,fp8,0,0.08170133332411449
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,48,128,1,float16,fp8,0,0.09340799848238628
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,1,128,1,fp8,fp8,0,0.06584000090758006
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,2,128,1,float16,float16,0,0.07789866626262665
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,2,128,1,float16,fp8,0,0.07760000228881836
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,2,128,1,fp8,fp8,0,0.06716266771157582
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,4,128,1,float16,fp8,0,0.08157333234945933
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,4,128,1,float16,float16,0,0.08003733555475871
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,4,128,1,fp8,fp8,0,0.07207466661930084
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,8,128,1,float16,float16,0,0.08051200211048126
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,8,128,1,float16,fp8,0,0.08065600196520488
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,48,8,128,1,fp8,fp8,0,0.07498666644096375
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,1,128,1,float16,float16,0,0.043893332282702126
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,1,128,1,float16,fp8,0,0.04345599810282389
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,48,128,1,float16,float16,0,0.051728000243504844
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,48,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,48,128,1,fp8,fp8,0,0.04776533444722494
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,1,128,1,fp8,fp8,0,0.03799466788768768
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,2,128,1,float16,float16,0,0.04426133135954539
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,2,128,1,float16,fp8,0,0.044112001856168113
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,2,128,1,fp8,fp8,0,0.038831998904546104
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,4,128,1,float16,float16,0,0.04558399816354116
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,4,128,1,float16,fp8,0,0.04554666578769684
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,4,128,1,fp8,fp8,0,0.042175998290379844
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,8,128,1,float16,fp8,0,0.0458186666170756
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,8,128,1,fp8,fp8,0,0.04277333120505015
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,48,8,128,1,float16,float16,0,0.04608533283074697
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,48,128,1,float16,float16,0,0.03183999905983607
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,1,128,1,float16,float16,0,0.02997333308060964
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,48,128,1,float16,fp8,0,0.03215999901294708
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,1,128,1,float16,fp8,0,0.02998399982849757
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,48,128,1,fp8,fp8,0,0.03029866764942805
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,1,128,1,fp8,fp8,0,0.02665599932273229
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,2,128,1,float16,float16,0,0.02997333308060964
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,2,128,1,fp8,fp8,0,0.02735999971628189
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,4,128,1,float16,float16,0,0.030837332208951313
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,4,128,1,fp8,fp8,0,0.029088000456492107
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,4,128,1,float16,fp8,0,0.031125334401925404
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,8,128,1,float16,float16,0,0.030591999491055805
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,2,128,1,float16,fp8,0,0.030320001145203907
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,8,128,1,float16,fp8,0,0.03092266619205475
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,48,8,128,1,fp8,fp8,0,0.029018667836983997
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,48,128,1,float16,float16,0,0.02310933421055476
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,48,128,1,float16,fp8,0,0.02342933416366577
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,48,128,1,fp8,fp8,0,0.022853332261244457
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,1,128,1,float16,fp8,0,0.022090665996074677
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,1,128,1,float16,float16,0,0.02231466770172119
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,2,128,1,float16,float16,0,0.021909333765506744
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,1,128,1,fp8,fp8,0,0.020469332734743755
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,2,128,1,float16,fp8,0,0.022672000030676525
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,2,128,1,fp8,fp8,0,0.020725333442290623
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,4,128,1,float16,float16,0,0.0230880007147789
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,4,128,1,float16,fp8,0,0.02309866746266683
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,8,128,1,float16,float16,0,0.023050665855407715
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,4,128,1,fp8,fp8,0,0.02142400046189626
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,8,128,1,float16,fp8,0,0.023103999594847362
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,48,8,128,1,fp8,fp8,0,0.021749332547187805
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,48,1,128,1,float16,float16,0,0.8828960259755453
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,48,1,128,1,float16,fp8,0,0.8805226484934489
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,48,1,128,1,fp8,fp8,0,0.8405333360036215
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,48,2,128,1,float16,float16,0,0.905839999516805
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,48,2,128,1,fp8,fp8,0,0.863589366277059
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,48,2,128,1,float16,fp8,0,0.9052533308664957
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,48,4,128,1,float16,float16,0,0.9655146598815918
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,48,4,128,1,float16,fp8,0,0.960373322168986
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,1,128,1,float16,float16,0,0.4538400173187256
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,48,4,128,1,fp8,fp8,0,0.9977066516876221
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,48,8,128,1,float16,float16,0,0.9762773513793945
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,48,8,128,1,float16,fp8,0,0.9740426540374756
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,48,8,128,1,fp8,fp8,0,1.019978682200114
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,1,128,1,float16,fp8,0,0.45154134432474774
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,48,128,1,float16,float16,0,0.5790719985961914
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,48,128,1,float16,fp8,0,0.5631733338038126
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,1,128,1,fp8,fp8,0,0.4171573321024577
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,48,128,1,fp8,fp8,0,0.5218559900919596
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,2,128,1,float16,float16,0,0.46344534556070965
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,2,128,1,float16,fp8,0,0.4621173143386841
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,2,128,1,fp8,fp8,0,0.43909335136413574
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,4,128,1,float16,float16,0,0.48844265937805176
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,4,128,1,float16,fp8,0,0.4811199903488159
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,4,128,1,fp8,fp8,0,0.503167986869812
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,8,128,1,float16,fp8,0,0.4853546619415283
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,8,128,1,float16,float16,0,0.4898933172225952
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,1,128,1,float16,float16,0,0.23491734266281128
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,48,8,128,1,fp8,fp8,0,0.5164159933725992
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,1,128,1,float16,fp8,0,0.23512534300486246
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,48,128,1,float16,float16,0,0.30687467257181805
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,48,128,1,float16,fp8,0,0.2970079978307088
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,1,128,1,fp8,fp8,0,0.21630932887395224
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,2,128,1,float16,float16,0,0.24060799678166708
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,48,128,1,fp8,fp8,0,0.2718399961789449
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,2,128,1,float16,fp8,0,0.23972266912460327
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,2,128,1,fp8,fp8,0,0.22485866149266562
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,4,128,1,float16,float16,0,0.2520693341890971
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,4,128,1,float16,fp8,0,0.2507839997609456
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,4,128,1,fp8,fp8,0,0.2568746606508891
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,8,128,1,float16,float16,0,0.2516106764475505
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,8,128,1,float16,fp8,0,0.25008533398310345
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,48,8,128,1,fp8,fp8,0,0.265338659286499
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,48,128,1,float16,float16,0,0.16499200463294983
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,1,128,1,float16,float16,0,0.12829867005348206
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,48,128,1,float16,fp8,0,0.15972266594568887
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,1,128,1,float16,fp8,0,0.12794133027394614
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,48,128,1,fp8,fp8,0,0.14516266187032065
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,1,128,1,fp8,fp8,0,0.11600533127784729
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,2,128,1,float16,float16,0,0.13083733121554056
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,2,128,1,float16,fp8,0,0.1316480040550232
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,2,128,1,fp8,fp8,0,0.11821867028872173
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,4,128,1,float16,float16,0,0.13570666313171387
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,4,128,1,fp8,fp8,0,0.13435733318328857
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,4,128,1,float16,fp8,0,0.13570132851600647
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,8,128,1,float16,float16,0,0.13688533504803976
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,8,128,1,float16,fp8,0,0.13621333241462708
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,48,8,128,1,fp8,fp8,0,0.13800000150998434
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,48,128,1,float16,float16,0,0.09258133172988892
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,1,128,1,float16,float16,0,0.07154666880766551
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,1,128,1,float16,fp8,0,0.07213333249092102
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,48,128,1,float16,fp8,0,0.08744000395139058
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,48,128,1,fp8,fp8,0,0.080335999528567
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,1,128,1,fp8,fp8,0,0.06572266419728597
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,2,128,1,float16,float16,0,0.07162133355935414
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,2,128,1,float16,fp8,0,0.07191466788450877
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,2,128,1,fp8,fp8,0,0.06576000154018402
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,4,128,1,float16,float16,0,0.07450133562088013
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,4,128,1,float16,fp8,0,0.07508266468842824
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,4,128,1,fp8,fp8,0,0.07208000123500824
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,8,128,1,float16,float16,0,0.07516799867153168
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,8,128,1,float16,fp8,0,0.0747626672188441
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,48,8,128,1,fp8,fp8,0,0.07457600037256877
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,48,128,1,float16,float16,0,0.05105599761009216
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,48,128,1,float16,fp8,0,0.049072002371152244
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,1,128,1,float16,float16,0,0.04124800115823746
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,1,128,1,float16,fp8,0,0.04144000013669332
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,1,128,1,fp8,fp8,0,0.037989333271980286
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,48,128,1,fp8,fp8,0,0.04721599817276001
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,2,128,1,float16,float16,0,0.04144533226887385
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,2,128,1,fp8,fp8,0,0.03851199895143509
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,2,128,1,float16,fp8,0,0.041984001795450844
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,4,128,1,float16,float16,0,0.043290664752324425
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,4,128,1,float16,fp8,0,0.042912001411120095
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,4,128,1,fp8,fp8,0,0.04197866717974345
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,8,128,1,float16,float16,0,0.04301333427429199
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,8,128,1,float16,fp8,0,0.043365334471066795
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,48,8,128,1,fp8,fp8,0,0.0429013321797053
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,48,128,1,float16,float16,0,0.0323840007185936
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,48,128,1,float16,fp8,0,0.031328000128269196
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,1,128,1,float16,float16,0,0.02899733434120814
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,48,128,1,fp8,fp8,0,0.030437332888444264
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,1,128,1,float16,fp8,0,0.029098667204380035
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,1,128,1,fp8,fp8,0,0.02697066714366277
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,2,128,1,float16,float16,0,0.02908266584078471
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,2,128,1,float16,fp8,0,0.029194665451844532
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,2,128,1,fp8,fp8,0,0.027274665733178455
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,4,128,1,float16,float16,0,0.029472000896930695
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,4,128,1,float16,fp8,0,0.030554667115211487
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,4,128,1,fp8,fp8,0,0.028981332977612812
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,8,128,1,float16,float16,0,0.02998399982849757
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,8,128,1,fp8,fp8,0,0.029109333952267964
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,48,128,1,float16,float16,0,0.02248000105222066
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,48,128,1,float16,fp8,0,0.02233600119749705
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,48,8,128,1,float16,fp8,0,0.03018666555484136
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,1,128,1,float16,fp8,0,0.02091199904680252
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,48,128,1,fp8,fp8,0,0.022842665513356526
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,1,128,1,float16,float16,0,0.020869334538777668
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,1,128,1,fp8,fp8,0,0.02035733312368393
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,2,128,1,float16,float16,0,0.020992000897725422
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,2,128,1,float16,fp8,0,0.021125334004561108
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,4,128,1,float16,fp8,0,0.021685334543387096
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,2,128,1,fp8,fp8,0,0.02093333254257838
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,8,128,1,float16,float16,0,0.021242665747801464
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,8,128,1,float16,fp8,0,0.021359999974568684
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,8,128,1,fp8,fp8,0,0.021690666675567627
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,4,128,1,fp8,fp8,0,0.021984001000722248
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,48,128,1,float16,float16,0,0.017850667238235474
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,48,4,128,1,float16,float16,0,0.021375998854637146
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,1,128,1,float16,fp8,0,0.01749333366751671
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,1,128,1,fp8,fp8,0,0.0176959993938605
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,1,128,1,float16,float16,0,0.017263999829689663
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,48,128,1,float16,fp8,0,0.017535999417304993
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,2,128,1,float16,float16,0,0.017231999586025875
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,48,128,1,fp8,fp8,0,0.018298666924238205
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,2,128,1,float16,fp8,0,0.017664000391960144
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,4,128,1,float16,float16,0,0.017114666601022083
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,4,128,1,float16,fp8,0,0.017952000101407368
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,4,128,1,fp8,fp8,0,0.01782400036851565
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,8,128,1,float16,fp8,0,0.017946666727463405
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,8,128,1,fp8,fp8,0,0.017637333522240322
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,8,128,1,float16,float16,0,0.017136000096797943
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,48,2,128,1,fp8,fp8,0,0.01762666677435239
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,48,1,128,1,float16,float16,0,0.5501439968744913
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,48,1,128,1,float16,fp8,0,0.5481653213500977
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,48,1,128,1,fp8,fp8,0,0.5690346558888754
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,48,2,128,1,float16,float16,0,0.5616746743520101
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,48,2,128,1,fp8,fp8,0,0.5910506645838419
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,48,2,128,1,float16,fp8,0,0.555840015411377
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,48,4,128,1,float16,float16,0,0.5861813227335612
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,48,4,128,1,float16,fp8,0,0.5832266807556152
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,48,4,128,1,fp8,fp8,0,0.6554133494695028
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,48,8,128,1,float16,float16,0,0.5888906717300415
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,1,128,1,float16,float16,0,0.2815413276354472
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,48,8,128,1,fp8,fp8,0,0.6673440138498942
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,48,8,128,1,float16,fp8,0,0.5936479965845743
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,48,128,1,float16,float16,0,0.32343467076619464
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,48,128,1,float16,fp8,0,0.31462399164835614
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,48,128,1,fp8,fp8,0,0.3455093304316203
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,1,128,1,float16,fp8,0,0.2816266616185506
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,1,128,1,fp8,fp8,0,0.291920006275177
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,2,128,1,float16,fp8,0,0.28489067157109577
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,2,128,1,fp8,fp8,0,0.30268265803654987
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,2,128,1,float16,float16,0,0.28595733642578125
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,4,128,1,fp8,fp8,0,0.3330666621526082
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,4,128,1,float16,fp8,0,0.2963786721229553
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,4,128,1,float16,float16,0,0.29874666531880695
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,8,128,1,float16,float16,0,0.3004320065180461
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,8,128,1,float16,fp8,0,0.29677865902582806
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,1,128,1,float16,float16,0,0.14958932995796204
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,1,128,1,float16,fp8,0,0.1500640014807383
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,48,8,128,1,fp8,fp8,0,0.33987732728322345
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,1,128,1,fp8,fp8,0,0.15612799922625223
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,48,128,1,float16,float16,0,0.1715839902559916
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,48,128,1,fp8,fp8,0,0.18116267522176108
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,48,128,1,float16,fp8,0,0.16637333234151205
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,2,128,1,float16,float16,0,0.15085867047309875
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,2,128,1,float16,fp8,0,0.15149333079655966
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,2,128,1,fp8,fp8,0,0.1593173344930013
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,4,128,1,float16,float16,0,0.15786133209864298
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,4,128,1,float16,fp8,0,0.15660267074902853
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,4,128,1,fp8,fp8,0,0.174234668413798
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,8,128,1,float16,float16,0,0.15937599539756775
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,8,128,1,float16,fp8,0,0.15858667095502219
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,48,8,128,1,fp8,fp8,0,0.17639466126759848
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,48,128,1,float16,float16,0,0.09293333689371745
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,48,128,1,float16,fp8,0,0.09039466579755147
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,48,128,1,fp8,fp8,0,0.10063466429710388
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,1,128,1,float16,fp8,0,0.08228266735871632
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,1,128,1,float16,float16,0,0.08182399968306224
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,1,128,1,fp8,fp8,0,0.08609599868456523
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,2,128,1,float16,fp8,0,0.0815413345893224
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,2,128,1,fp8,fp8,0,0.08682133754094441
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,2,128,1,float16,float16,0,0.08179733157157898
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,4,128,1,float16,float16,0,0.08550933003425598
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,4,128,1,float16,fp8,0,0.08558400472005208
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,4,128,1,fp8,fp8,0,0.0937600036462148
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,8,128,1,float16,fp8,0,0.08591999610265096
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,8,128,1,float16,float16,0,0.08561066786448161
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,48,8,128,1,fp8,fp8,0,0.09553600351015727
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,48,128,1,float16,float16,0,0.052239999175071716
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,1,128,1,float16,float16,0,0.04695466657479604
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,1,128,1,float16,fp8,0,0.046911999583244324
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,48,128,1,fp8,fp8,0,0.057536001006762184
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,48,128,1,float16,fp8,0,0.05064000189304352
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,1,128,1,fp8,fp8,0,0.04874666531880697
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,2,128,1,float16,float16,0,0.04701333244641622
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,2,128,1,float16,fp8,0,0.047024001677831016
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,2,128,1,fp8,fp8,0,0.04927466809749603
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,4,128,1,float16,float16,0,0.048800001541773476
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,4,128,1,float16,fp8,0,0.04842133323351542
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,4,128,1,fp8,fp8,0,0.05241066714127859
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,8,128,1,float16,float16,0,0.048570667703946434
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,8,128,1,float16,fp8,0,0.048810665806134544
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,48,8,128,1,fp8,fp8,0,0.05323733389377594
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,48,128,1,float16,float16,0,0.030608000854651134
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,1,128,1,float16,float16,0,0.02926933268706004
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,48,128,1,float16,fp8,0,0.029845332105954487
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,48,128,1,fp8,fp8,0,0.033520000676314034
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,1,128,1,float16,fp8,0,0.029466666281223297
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,1,128,1,fp8,fp8,0,0.030693332354227703
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,2,128,1,float16,float16,0,0.029616000751654308
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,2,128,1,float16,fp8,0,0.02979733298222224
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,2,128,1,fp8,fp8,0,0.030799999833106995
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,4,128,1,float16,float16,0,0.030165334542592365
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,4,128,1,float16,fp8,0,0.03035733352104823
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,4,128,1,fp8,fp8,0,0.03268799930810928
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,8,128,1,float16,float16,0,0.02998399982849757
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,8,128,1,float16,fp8,0,0.03057066599527995
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,48,8,128,1,fp8,fp8,0,0.03270933280388514
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,48,128,1,float16,float16,0,0.023946667710940044
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,48,128,1,fp8,fp8,0,0.02657066782315572
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,1,128,1,float16,float16,0,0.022730665902296703
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,48,128,1,float16,fp8,0,0.023951999843120575
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,1,128,1,fp8,fp8,0,0.023936000963052113
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,1,128,1,float16,fp8,0,0.022848000129063923
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,2,128,1,float16,fp8,0,0.023024000227451324
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,2,128,1,float16,float16,0,0.022970666488011677
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,2,128,1,fp8,fp8,0,0.024618667860825855
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,4,128,1,float16,fp8,0,0.02363733450571696
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,4,128,1,float16,float16,0,0.02334933231274287
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,8,128,1,float16,float16,0,0.023472001155217487
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,8,128,1,float16,fp8,0,0.02348800003528595
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,4,128,1,fp8,fp8,0,0.02526933451493581
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,48,8,128,1,fp8,fp8,0,0.025466665625572205
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,48,128,1,float16,float16,0,0.017162666966517765
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,48,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,1,128,1,float16,fp8,0,0.01664000004529953
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,48,128,1,fp8,fp8,0,0.01844800015290578
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,1,128,1,fp8,fp8,0,0.017573333034912746
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,1,128,1,float16,float16,0,0.01628799984852473
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,2,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,2,128,1,float16,float16,0,0.01611199975013733
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,2,128,1,fp8,fp8,0,0.01773333301146825
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,4,128,1,float16,float16,0,0.016208000481128693
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,4,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,4,128,1,fp8,fp8,0,0.017765333255132038
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,8,128,1,float16,float16,0,0.016384000579516094
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,8,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,48,8,128,1,fp8,fp8,0,0.017727999637524288
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,48,128,1,float16,fp8,0,0.016224000602960587
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,48,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,1,128,1,float16,float16,0,0.015568000574906668
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,48,128,1,float16,float16,0,0.015429332852363586
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,1,128,1,float16,fp8,0,0.016063999384641647
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,1,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,2,128,1,float16,float16,0,0.015520000209410986
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,2,128,1,fp8,fp8,0,0.016757333030303318
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,2,128,1,float16,fp8,0,0.01611199975013733
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,4,128,1,float16,float16,0,0.015637333194414776
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,4,128,1,float16,fp8,0,0.016143999993801117
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,8,128,1,float16,float16,0,0.015568000574906668
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,8,128,1,float16,fp8,0,0.016362667083740234
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,4,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,48,8,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,48,1,128,1,float16,float16,0,0.37907199064890545
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,48,1,128,1,float16,fp8,0,0.380351980527242
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,48,2,128,1,float16,float16,0,0.38366933663686115
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,48,1,128,1,fp8,fp8,0,0.45234668254852295
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,48,4,128,1,float16,float16,0,0.39311468601226807
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,48,4,128,1,float16,fp8,0,0.39715198675791424
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,48,2,128,1,float16,fp8,0,0.3828853368759155
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,48,2,128,1,fp8,fp8,0,0.45922664801279706
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,48,4,128,1,fp8,fp8,0,0.49037333329518634
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,48,8,128,1,float16,float16,0,0.39951467514038086
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,1,128,1,float16,float16,0,0.19727466503779092
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,48,8,128,1,float16,fp8,0,0.40026132265726727
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,48,8,128,1,fp8,fp8,0,0.5007573366165161
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,48,128,1,float16,fp8,0,0.2108746568361918
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,1,128,1,float16,fp8,0,0.197818656762441
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,48,128,1,fp8,fp8,0,0.2595253388086955
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,48,128,1,float16,float16,0,0.21448532740275064
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,1,128,1,fp8,fp8,0,0.23562133312225342
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,2,128,1,float16,float16,0,0.1994453271230062
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,2,128,1,float16,fp8,0,0.19929067293802896
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,2,128,1,fp8,fp8,0,0.23940799633661905
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,4,128,1,float16,float16,0,0.2046453356742859
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,4,128,1,float16,fp8,0,0.20521066586176553
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,4,128,1,fp8,fp8,0,0.25357866287231445
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,8,128,1,float16,float16,0,0.20705066124598184
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,8,128,1,float16,fp8,0,0.20925867557525635
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,48,8,128,1,fp8,fp8,0,0.25819732745488483
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,48,128,1,float16,float16,0,0.11518933375676473
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,48,128,1,float16,fp8,0,0.11248532931009929
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,1,128,1,float16,float16,0,0.10512533783912659
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,1,128,1,float16,fp8,0,0.10480533043543498
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,1,128,1,fp8,fp8,0,0.1269973317782084
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,2,128,1,float16,float16,0,0.10482666889826457
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,2,128,1,float16,fp8,0,0.10470933715502422
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,48,128,1,fp8,fp8,0,0.14076266686121622
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,2,128,1,fp8,fp8,0,0.1285866697629293
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,4,128,1,float16,float16,0,0.10646933317184448
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,4,128,1,float16,fp8,0,0.107205331325531
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,4,128,1,fp8,fp8,0,0.1341386636098226
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,8,128,1,float16,float16,0,0.10791466633478801
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,8,128,1,float16,fp8,0,0.1083626647790273
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,48,8,128,1,fp8,fp8,0,0.1368000010649363
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,48,128,1,float16,float16,0,0.06316799918810527
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,48,128,1,float16,fp8,0,0.06281066437562306
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,1,128,1,float16,fp8,0,0.05798399945100149
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,1,128,1,float16,float16,0,0.05817066629727682
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,48,128,1,fp8,fp8,0,0.07824000219504039
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,1,128,1,fp8,fp8,0,0.06942399839560191
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,2,128,1,float16,float16,0,0.05834133426348368
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,2,128,1,float16,fp8,0,0.05949333310127258
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,2,128,1,fp8,fp8,0,0.07029866675535838
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,4,128,1,float16,float16,0,0.0591786652803421
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,4,128,1,float16,fp8,0,0.05959466596444448
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,8,128,1,float16,float16,0,0.0590826670328776
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,4,128,1,fp8,fp8,0,0.07357866565386455
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,8,128,1,fp8,fp8,0,0.07462933162848155
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,48,8,128,1,float16,fp8,0,0.05997333427270254
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,48,128,1,float16,float16,0,0.03500800083080927
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,48,128,1,float16,fp8,0,0.035162667433420815
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,1,128,1,float16,float16,0,0.03465600063403448
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,48,128,1,fp8,fp8,0,0.04342400034268697
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,1,128,1,float16,fp8,0,0.035018667578697205
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,1,128,1,fp8,fp8,0,0.040709334115187325
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,2,128,1,float16,float16,0,0.035018667578697205
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,2,128,1,float16,fp8,0,0.03480533262093862
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,2,128,1,fp8,fp8,0,0.041482667128245033
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,4,128,1,float16,float16,0,0.035487999518712364
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,4,128,1,float16,fp8,0,0.03551466763019562
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,4,128,1,fp8,fp8,0,0.042949333786964417
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,8,128,1,float16,float16,0,0.03573333223660787
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,8,128,1,float16,fp8,0,0.03573333223660787
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,48,8,128,1,fp8,fp8,0,0.043391997615496315
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,48,128,1,float16,float16,0,0.02481066683928172
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,48,128,1,float16,fp8,0,0.02493866781393687
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,48,128,1,fp8,fp8,0,0.02961066613594691
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,1,128,1,float16,float16,0,0.02370133250951767
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,1,128,1,float16,fp8,0,0.023631999890009563
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,1,128,1,fp8,fp8,0,0.0273333340883255
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,2,128,1,float16,float16,0,0.023872000475724537
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,2,128,1,float16,fp8,0,0.023647998770078022
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,2,128,1,fp8,fp8,0,0.02733866622050603
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,4,128,1,float16,float16,0,0.02436800052722295
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,4,128,1,float16,fp8,0,0.024245334168275196
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,4,128,1,fp8,fp8,0,0.028688001135985058
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,8,128,1,float16,float16,0,0.024432001014550526
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,8,128,1,float16,fp8,0,0.024586667617162068
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,48,8,128,1,fp8,fp8,0,0.028698667883872986
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,48,128,1,float16,float16,0,0.019061333189407986
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,48,128,1,float16,fp8,0,0.019050666441520054
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,48,128,1,fp8,fp8,0,0.021754667162895203
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,1,128,1,float16,float16,0,0.018757333358128864
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,1,128,1,float16,fp8,0,0.018245333184798557
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,1,128,1,fp8,fp8,0,0.021183999876181286
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,2,128,1,float16,float16,0,0.0182239996890227
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,2,128,1,float16,fp8,0,0.01868266612291336
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,2,128,1,fp8,fp8,0,0.021386665602525074
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,4,128,1,float16,float16,0,0.01812800019979477
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,4,128,1,float16,fp8,0,0.01854933301607768
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,4,128,1,fp8,fp8,0,0.021562665700912476
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,8,128,1,float16,float16,0,0.018629333625237148
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,8,128,1,float16,fp8,0,0.018624000251293182
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,48,8,128,1,fp8,fp8,0,0.02176533391078313
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,48,128,1,float16,float16,0,0.015370666980743408
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,48,128,1,float16,fp8,0,0.015813333292802174
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,48,128,1,fp8,fp8,0,0.017509333789348602
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,1,128,1,float16,float16,0,0.014885333677132925
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,1,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,2,128,1,float16,float16,0,0.014853333433469137
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,1,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,2,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,2,128,1,fp8,fp8,0,0.017386666188637417
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,4,128,1,float16,float16,0,0.015381333728631338
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,4,128,1,float16,fp8,0,0.015466666469971338
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,4,128,1,fp8,fp8,0,0.017456000049908955
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,8,128,1,float16,float16,0,0.015530666957298914
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,8,128,1,float16,fp8,0,0.01591466615597407
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,48,128,1,float16,float16,0,0.014490666488806406
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,48,8,128,1,fp8,fp8,0,0.017562666287024815
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,48,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,48,128,1,fp8,fp8,0,0.016415999581416447
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,1,128,1,float16,float16,0,0.014250667144854864
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,1,128,1,fp8,fp8,0,0.016421332955360413
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,1,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,2,128,1,float16,float16,0,0.01440000037352244
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,2,128,1,float16,fp8,0,0.014736000448465347
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,2,128,1,fp8,fp8,0,0.01664000004529953
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,4,128,1,float16,float16,0,0.0145066666106383
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,4,128,1,float16,fp8,0,0.014885333677132925
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,4,128,1,fp8,fp8,0,0.01639466608564059
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,8,128,1,float16,float16,0,0.014432000617186228
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,8,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,48,8,128,1,fp8,fp8,0,0.01657066618402799
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,48,1,128,1,float16,float16,0,0.3251466751098633
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,48,1,128,1,float16,fp8,0,0.3251519997914632
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,48,1,128,1,fp8,fp8,0,0.3895786603291829
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,48,2,128,1,float16,float16,0,0.3259519934654236
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,48,2,128,1,float16,fp8,0,0.3252799908320109
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,48,2,128,1,fp8,fp8,0,0.39235198497772217
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,48,4,128,1,float16,float16,0,0.3303146759668986
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,48,4,128,1,float16,fp8,0,0.3312373359998067
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,48,4,128,1,fp8,fp8,0,0.40803734461466473
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,48,8,128,1,float16,float16,0,0.33262399832407635
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,1,128,1,float16,float16,0,0.16834133863449097
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,48,8,128,1,float16,fp8,0,0.3330826759338379
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,48,128,1,float16,float16,0,0.16512533028920492
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,48,8,128,1,fp8,fp8,0,0.4119679927825928
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,48,128,1,fp8,fp8,0,0.21544533967971802
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,48,128,1,float16,fp8,0,0.16273599863052368
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,1,128,1,float16,fp8,0,0.1678719917933146
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,1,128,1,fp8,fp8,0,0.20406933625539145
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,2,128,1,float16,float16,0,0.1684266726175944
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,2,128,1,float16,fp8,0,0.16881599028905234
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,2,128,1,fp8,fp8,0,0.2050559918085734
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,4,128,1,float16,float16,0,0.1718239982922872
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,4,128,1,float16,fp8,0,0.17141334215799967
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,4,128,1,fp8,fp8,0,0.2116533319155375
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,8,128,1,float16,float16,0,0.17308799425760904
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,8,128,1,float16,fp8,0,0.1732106606165568
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,48,8,128,1,fp8,fp8,0,0.21307732661565146
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,1,128,1,float16,float16,0,0.09026133020718892
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,48,128,1,float16,fp8,0,0.08673066894213359
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,48,128,1,float16,float16,0,0.08886399865150452
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,48,128,1,fp8,fp8,0,0.11674666404724121
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,1,128,1,float16,fp8,0,0.09106666843096416
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,1,128,1,fp8,fp8,0,0.10980266332626343
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,2,128,1,float16,float16,0,0.09055999914805095
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,2,128,1,float16,fp8,0,0.09071466326713562
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,2,128,1,fp8,fp8,0,0.10989866654078166
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,4,128,1,float16,float16,0,0.09154133001963298
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,4,128,1,fp8,fp8,0,0.11358400185902913
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,4,128,1,float16,fp8,0,0.09292266766230266
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,8,128,1,float16,float16,0,0.09181333581606548
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,8,128,1,float16,fp8,0,0.09311466415723164
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,48,8,128,1,fp8,fp8,0,0.11429333686828613
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,48,128,1,float16,float16,0,0.047584002216657005
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,48,128,1,float16,fp8,0,0.04797866443792979
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,48,128,1,fp8,fp8,0,0.06294399996598561
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,1,128,1,float16,fp8,0,0.05211733281612396
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,1,128,1,float16,float16,0,0.05212266743183136
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,1,128,1,fp8,fp8,0,0.06166933476924896
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,2,128,1,float16,float16,0,0.05199466645717621
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,2,128,1,float16,fp8,0,0.05208000044027964
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,4,128,1,float16,float16,0,0.05292266607284546
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,2,128,1,fp8,fp8,0,0.06243733565012614
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,4,128,1,float16,fp8,0,0.05295999844868978
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,4,128,1,fp8,fp8,0,0.06414933502674103
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,8,128,1,float16,float16,0,0.052613332867622375
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,8,128,1,float16,fp8,0,0.05312533179918925
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,48,8,128,1,fp8,fp8,0,0.06437333424886067
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,48,128,1,float16,float16,0,0.03173866619666418
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,1,128,1,float16,float16,0,0.032111999889214836
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,48,128,1,float16,fp8,0,0.03175999969244003
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,1,128,1,float16,fp8,0,0.032314665615558624
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,48,128,1,fp8,fp8,0,0.03920533259709676
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,1,128,1,fp8,fp8,0,0.03833066672086716
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,2,128,1,float16,float16,0,0.032458665470282234
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,2,128,1,float16,fp8,0,0.032672000428040825
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,2,128,1,fp8,fp8,0,0.03833066672086716
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,4,128,1,float16,float16,0,0.03266666581233343
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,4,128,1,float16,fp8,0,0.03299733251333237
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,4,128,1,fp8,fp8,0,0.039306665460268654
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,8,128,1,float16,fp8,0,0.03291733314593633
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,8,128,1,float16,float16,0,0.03313066562016805
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,48,8,128,1,fp8,fp8,0,0.03921599934498469
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,48,128,1,float16,float16,0,0.021456000705560047
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,48,128,1,float16,fp8,0,0.021776000658671062
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,48,128,1,fp8,fp8,0,0.02554133286078771
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,1,128,1,float16,float16,0,0.02128000060717265
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,1,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,1,128,1,fp8,fp8,0,0.02513066679239273
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,2,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,2,128,1,float16,float16,0,0.02109333376089732
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,2,128,1,fp8,fp8,0,0.02515733242034912
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,4,128,1,float16,float16,0,0.020810666183630627
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,4,128,1,float16,fp8,0,0.021210665504137676
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,4,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,8,128,1,float16,float16,0,0.021055998901526134
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,8,128,1,float16,fp8,0,0.02146666745344798
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,48,8,128,1,fp8,fp8,0,0.025648000339667004
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,48,128,1,float16,float16,0,0.017829333742459614
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,48,128,1,float16,fp8,0,0.01807466646035512
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,48,128,1,fp8,fp8,0,0.021018666525681812
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,1,128,1,float16,float16,0,0.017488000293572743
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,1,128,1,float16,fp8,0,0.017909333109855652
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,1,128,1,fp8,fp8,0,0.020768000433842342
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,2,128,1,float16,float16,0,0.017701332767804463
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,2,128,1,float16,fp8,0,0.018405333161354065
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,2,128,1,fp8,fp8,0,0.02096533278624217
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,4,128,1,float16,float16,0,0.01783466711640358
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,4,128,1,float16,fp8,0,0.017984000345071156
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,4,128,1,fp8,fp8,0,0.02103466788927714
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,8,128,1,float16,float16,0,0.01785600061217944
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,8,128,1,fp8,fp8,0,0.020986666282018025
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,48,8,128,1,float16,fp8,0,0.01846933364868164
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,48,128,1,float16,float16,0,0.01471466695268949
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,48,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,48,128,1,fp8,fp8,0,0.016741332908471424
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,1,128,1,float16,float16,0,0.014352000008026758
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,1,128,1,float16,fp8,0,0.014848000059525171
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,1,128,1,fp8,fp8,0,0.016778666526079178
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,2,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,2,128,1,float16,float16,0,0.01452800010641416
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,2,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,4,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,4,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,4,128,1,float16,float16,0,0.014938666174809137
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,8,128,1,float16,float16,0,0.01440000037352244
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,8,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,48,8,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,48,128,1,float16,float16,0,0.014149333039919535
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,48,128,1,float16,fp8,0,0.014682666709025701
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,1,128,1,float16,float16,0,0.013936000565687815
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,48,128,1,fp8,fp8,0,0.01616000011563301
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,1,128,1,float16,fp8,0,0.01479999969402949
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,2,128,1,float16,float16,0,0.014117332796255747
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,1,128,1,fp8,fp8,0,0.01613866661985715
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,2,128,1,float16,fp8,0,0.014773332824309668
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,2,128,1,fp8,fp8,0,0.016362667083740234
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,4,128,1,float16,float16,0,0.01404800017674764
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,4,128,1,float16,fp8,0,0.014730667074521383
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,4,128,1,fp8,fp8,0,0.016352000335852306
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,8,128,1,float16,float16,0,0.01423466702302297
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,8,128,1,float16,fp8,0,0.014831999937693277
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,48,8,128,1,fp8,fp8,0,0.016623999923467636
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,40,2,128,1,fp8,fp8,0,26.65912119547526
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,40,1,128,1,float16,float16,0,41.47552998860677
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,40,1,128,1,fp8,fp8,0,26.760294596354168
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,40,2,128,1,float16,fp8,0,41.12359364827474
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,40,2,128,1,float16,float16,0,40.54254404703776
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,40,1,128,1,float16,fp8,0,41.014801025390625
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,40,4,128,1,float16,fp8,0,41.38520050048828
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,40,4,128,1,float16,float16,0,41.30826059977213
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,1,128,1,float16,float16,0,20.601317087809246
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,40,4,128,1,fp8,fp8,0,26.784998575846355
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,40,8,128,1,float16,float16,0,41.44910430908203
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,1,128,1,float16,fp8,0,20.691333770751953
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,40,8,128,1,float16,fp8,0,40.92939758300781
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,40,128,1,float16,float16,0,19.846820831298828
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,40,8,128,1,fp8,fp8,0,27.05980682373047
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,1,128,1,fp8,fp8,0,13.499883015950521
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,40,128,1,fp8,fp8,0,13.701583862304688
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,40,128,1,float16,fp8,0,20.31167984008789
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,2,128,1,float16,float16,0,20.906992594401043
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,2,128,1,fp8,fp8,0,13.612730662027994
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,2,128,1,float16,fp8,0,20.60424550374349
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,4,128,1,float16,float16,0,20.399205525716145
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,4,128,1,float16,fp8,0,20.629627227783203
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,4,128,1,fp8,fp8,0,13.50067138671875
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,8,128,1,float16,float16,0,20.969946543375652
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,1,128,1,float16,float16,0,10.532981236775717
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,8,128,1,fp8,fp8,0,13.718949635823568
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,1,128,1,float16,fp8,0,10.363093058268229
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,40,128,1,float16,fp8,0,10.289728164672852
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,40,128,1,fp8,fp8,0,6.70199457804362
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,40,8,128,1,float16,fp8,0,20.59765879313151
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,40,128,1,float16,float16,0,10.305050532023111
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,1,128,1,fp8,fp8,0,6.766784032185872
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,2,128,1,float16,float16,0,10.552314758300781
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,2,128,1,float16,fp8,0,10.464687983194986
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,2,128,1,fp8,fp8,0,6.715898513793945
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,4,128,1,float16,float16,0,10.453744252522787
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,4,128,1,float16,fp8,0,10.254666646321615
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,4,128,1,fp8,fp8,0,6.755317052205403
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,8,128,1,fp8,fp8,0,6.8030134836832685
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,8,128,1,float16,float16,0,10.509509404500326
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,40,8,128,1,float16,fp8,0,10.592698415120443
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,1,128,1,float16,float16,0,5.189706802368164
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,40,128,1,float16,float16,0,5.203850746154785
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,40,128,1,float16,fp8,0,5.196559906005859
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,1,128,1,fp8,fp8,0,3.4209973017374673
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,2,128,1,float16,float16,0,5.052090644836426
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,1,128,1,float16,fp8,0,5.3259627024332685
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,2,128,1,float16,fp8,0,5.26853879292806
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,2,128,1,fp8,fp8,0,3.4354934692382812
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,40,128,1,fp8,fp8,0,3.3720534642537436
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,4,128,1,float16,float16,0,5.322330792744954
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,4,128,1,fp8,fp8,0,3.475125312805176
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,4,128,1,float16,fp8,0,5.215717315673828
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,8,128,1,float16,float16,0,5.297936121622722
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,8,128,1,float16,fp8,0,5.302602767944336
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,40,8,128,1,fp8,fp8,0,3.4305760065714517
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,40,1,128,1,float16,float16,0,23.564613342285156
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,40,1,128,1,float16,fp8,0,23.733983357747395
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,40,1,128,1,fp8,fp8,0,15.798021952311197
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,40,2,128,1,float16,float16,0,24.024559020996094
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,40,2,128,1,float16,fp8,0,23.703440348307293
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,40,2,128,1,fp8,fp8,0,15.684815724690756
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,40,4,128,1,float16,float16,0,23.81110382080078
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,40,4,128,1,float16,fp8,0,24.129552205403645
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,40,4,128,1,fp8,fp8,0,15.595947265625
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,1,128,1,float16,float16,0,12.127583821614584
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,40,8,128,1,float16,float16,0,23.88013966878255
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,40,8,128,1,fp8,fp8,0,15.936634063720703
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,40,128,1,float16,float16,0,11.798431396484375
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,40,8,128,1,float16,fp8,0,23.904070536295574
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,1,128,1,float16,fp8,0,11.777381896972656
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,1,128,1,fp8,fp8,0,7.929434458414714
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,40,128,1,float16,fp8,0,11.849989573160807
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,40,128,1,fp8,fp8,0,7.976512273152669
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,2,128,1,float16,float16,0,11.940266927083334
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,2,128,1,fp8,fp8,0,7.792144139607747
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,2,128,1,float16,fp8,0,11.905887603759766
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,4,128,1,float16,float16,0,12.10806401570638
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,4,128,1,float16,fp8,0,11.859840393066406
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,4,128,1,fp8,fp8,0,7.961429595947266
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,8,128,1,float16,float16,0,12.051860809326172
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,8,128,1,float16,fp8,0,12.027851104736328
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,1,128,1,float16,float16,0,5.928768157958984
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,1,128,1,float16,fp8,0,6.140346527099609
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,40,8,128,1,fp8,fp8,0,7.922768274943034
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,1,128,1,fp8,fp8,0,3.913930575052897
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,2,128,1,float16,float16,0,6.09556770324707
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,40,128,1,float16,fp8,0,5.758405049641927
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,40,128,1,float16,float16,0,5.9045759836832685
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,2,128,1,float16,fp8,0,5.994639714558919
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,2,128,1,fp8,fp8,0,4.014944076538086
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,40,128,1,fp8,fp8,0,4.022831916809082
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,4,128,1,float16,float16,0,5.854586919148763
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,4,128,1,float16,fp8,0,5.896917343139648
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,4,128,1,fp8,fp8,0,3.9560960133870444
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,8,128,1,float16,float16,0,6.051104227701823
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,1,128,1,float16,float16,0,2.999663988749186
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,8,128,1,float16,fp8,0,5.803813298543294
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,40,8,128,1,fp8,fp8,0,3.943888028462728
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,40,128,1,float16,float16,0,2.947711944580078
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,40,128,1,float16,fp8,0,2.9522988001505532
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,40,128,1,fp8,fp8,0,2.058501402537028
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,1,128,1,float16,fp8,0,3.0354026158650718
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,1,128,1,fp8,fp8,0,2.069690704345703
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,2,128,1,float16,float16,0,3.0014241536458335
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,2,128,1,float16,fp8,0,2.9613866806030273
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,2,128,1,fp8,fp8,0,2.0613706906636557
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,4,128,1,float16,float16,0,3.0552425384521484
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,4,128,1,float16,fp8,0,3.0057385762532554
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,4,128,1,fp8,fp8,0,2.0723840395609536
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,8,128,1,float16,float16,0,3.032714525858561
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,8,128,1,float16,fp8,0,3.0413118998209634
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,40,8,128,1,fp8,fp8,0,2.0793174107869468
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,40,1,128,1,float16,float16,0,16.883647918701172
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,40,1,128,1,fp8,fp8,0,11.431413014729818
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,40,1,128,1,float16,fp8,0,16.576661427815754
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,40,2,128,1,float16,float16,0,17.023173014322918
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,40,2,128,1,float16,fp8,0,16.77395248413086
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,40,2,128,1,fp8,fp8,0,11.210559844970703
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,40,4,128,1,float16,fp8,0,16.840133666992188
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,40,4,128,1,float16,float16,0,17.052932739257812
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,1,128,1,float16,float16,0,8.490400314331055
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,40,4,128,1,fp8,fp8,0,11.25323740641276
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,40,8,128,1,fp8,fp8,0,11.41756820678711
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,40,8,128,1,float16,fp8,0,16.685211181640625
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,40,8,128,1,float16,float16,0,17.254736582438152
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,40,128,1,float16,float16,0,8.472607930501303
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,40,128,1,float16,fp8,0,8.507189432779947
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,1,128,1,float16,fp8,0,8.55890146891276
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,40,128,1,fp8,fp8,0,5.71177609761556
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,1,128,1,fp8,fp8,0,5.428880055745442
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,2,128,1,float16,float16,0,8.549082438151041
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,2,128,1,float16,fp8,0,8.278026580810547
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,2,128,1,fp8,fp8,0,5.585589090983073
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,4,128,1,float16,float16,0,8.503765106201172
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,4,128,1,fp8,fp8,0,5.67296028137207
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,4,128,1,float16,fp8,0,8.40660285949707
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,8,128,1,float16,float16,0,8.57857577006022
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,8,128,1,float16,fp8,0,8.446943918863932
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,1,128,1,float16,fp8,0,4.256954511006673
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,1,128,1,float16,float16,0,4.115008036295573
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,40,8,128,1,fp8,fp8,0,5.6466935475667315
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,1,128,1,fp8,fp8,0,2.789072036743164
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,2,128,1,float16,float16,0,4.065130551656087
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,40,128,1,float16,float16,0,4.158624013264974
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,40,128,1,float16,fp8,0,4.19763724009196
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,40,128,1,fp8,fp8,0,2.8153225580851235
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,2,128,1,fp8,fp8,0,2.7994559605916343
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,2,128,1,float16,fp8,0,4.125818570454915
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,4,128,1,float16,fp8,0,4.040970802307129
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,4,128,1,float16,float16,0,4.2045332590738935
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,4,128,1,fp8,fp8,0,2.7932427724202475
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,8,128,1,float16,float16,0,4.229680061340332
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,8,128,1,float16,fp8,0,4.060085296630859
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,40,8,128,1,fp8,fp8,0,2.8064425786336265
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,1,128,1,float16,float16,0,2.18175999323527
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,1,128,1,float16,fp8,0,2.1364320119222007
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,40,128,1,float16,float16,0,2.1516000429789224
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,1,128,1,fp8,fp8,0,1.5149119695027669
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,40,128,1,float16,fp8,0,2.1465919812520347
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,2,128,1,float16,float16,0,2.1751200358072915
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,2,128,1,float16,fp8,0,2.1382932662963867
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,2,128,1,fp8,fp8,0,1.5234559377034504
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,40,128,1,fp8,fp8,0,1.525525410970052
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,4,128,1,float16,float16,0,2.165599981943766
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,4,128,1,float16,fp8,0,2.1588160196940103
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,4,128,1,fp8,fp8,0,1.5198346773783367
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,8,128,1,float16,float16,0,2.1936000188191733
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,8,128,1,float16,fp8,0,2.1528852780659995
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,40,8,128,1,fp8,fp8,0,1.527664025624593
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,40,1,128,1,fp8,fp8,0,14.944058736165365
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,40,1,128,1,float16,fp8,0,22.186912536621094
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,40,1,128,1,float16,float16,0,22.33643849690755
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,40,2,128,1,float16,fp8,0,21.874982198079426
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,40,2,128,1,fp8,fp8,0,14.972000122070312
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,40,2,128,1,float16,float16,0,22.36925760904948
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,40,4,128,1,float16,fp8,0,22.307993570963543
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,40,4,128,1,float16,float16,0,22.654027303059895
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,1,128,1,float16,float16,0,11.471968332926432
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,40,4,128,1,fp8,fp8,0,14.945786794026693
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,40,8,128,1,float16,float16,0,22.420794169108074
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,40,8,128,1,fp8,fp8,0,14.889530181884766
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,40,128,1,float16,float16,0,11.177605946858725
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,1,128,1,float16,fp8,0,11.220757802327475
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,40,8,128,1,float16,fp8,0,22.4989013671875
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,40,128,1,float16,fp8,0,11.074143727620443
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,40,128,1,fp8,fp8,0,7.739722569783528
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,1,128,1,fp8,fp8,0,7.261973063151042
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,2,128,1,float16,float16,0,11.125738779703775
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,2,128,1,float16,fp8,0,11.004442850748697
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,2,128,1,fp8,fp8,0,7.516725540161133
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,4,128,1,float16,fp8,0,11.26205317179362
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,4,128,1,float16,float16,0,11.281402587890625
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,4,128,1,fp8,fp8,0,7.4947255452473955
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,8,128,1,float16,float16,0,11.275941212972006
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,8,128,1,float16,fp8,0,11.181524912516275
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,1,128,1,float16,float16,0,5.575455983479817
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,1,128,1,float16,fp8,0,5.420064290364583
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,1,128,1,fp8,fp8,0,3.6050186157226562
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,40,8,128,1,fp8,fp8,0,7.551066716512044
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,40,128,1,float16,float16,0,5.194623947143555
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,2,128,1,float16,float16,0,5.390298843383789
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,40,128,1,float16,fp8,0,5.521200180053711
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,2,128,1,fp8,fp8,0,3.676032066345215
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,2,128,1,float16,fp8,0,5.319205284118652
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,40,128,1,fp8,fp8,0,3.8463465372721353
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,4,128,1,float16,fp8,0,5.535125096638997
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,4,128,1,float16,float16,0,5.600165049235026
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,4,128,1,fp8,fp8,0,3.6632798512776694
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,8,128,1,float16,float16,0,5.483072280883789
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,8,128,1,fp8,fp8,0,3.738719940185547
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,1,128,1,float16,float16,0,2.7441279093424478
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,40,8,128,1,float16,fp8,0,5.340272267659505
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,40,128,1,float16,float16,0,2.7154452006022134
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,40,128,1,float16,fp8,0,2.7581278483072915
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,1,128,1,float16,fp8,0,2.6755574544270835
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,1,128,1,fp8,fp8,0,1.9022132555643718
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,2,128,1,float16,float16,0,2.763813336690267
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,2,128,1,fp8,fp8,0,1.9136373202006023
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,2,128,1,float16,fp8,0,2.7231359481811523
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,40,128,1,fp8,fp8,0,1.9466773668924968
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,4,128,1,float16,float16,0,2.738037427266439
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,4,128,1,fp8,fp8,0,1.9046506881713867
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,4,128,1,float16,fp8,0,2.7225173314412436
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,1,128,1,float16,fp8,0,1.4595840771993
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,1,128,1,float16,float16,0,1.4627572695414226
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,8,128,1,float16,fp8,0,2.7483625411987305
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,8,128,1,float16,float16,0,2.748933474222819
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,40,8,128,1,fp8,fp8,0,1.9296587308247883
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,40,128,1,fp8,fp8,0,1.0635573069254558
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,40,128,1,float16,fp8,0,1.445365269978841
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,40,128,1,float16,float16,0,1.448970635732015
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,1,128,1,fp8,fp8,0,1.056938648223877
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,2,128,1,float16,float16,0,1.4650452931722004
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,2,128,1,float16,fp8,0,1.4602079391479492
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,2,128,1,fp8,fp8,0,1.0537066459655762
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,4,128,1,float16,float16,0,1.4871840476989746
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,4,128,1,fp8,fp8,0,1.0594186782836914
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,4,128,1,float16,fp8,0,1.4556105931599934
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,8,128,1,float16,float16,0,1.4736372629801433
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,8,128,1,float16,fp8,0,1.4682025909423828
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,40,8,128,1,fp8,fp8,0,1.0598133405049641
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,40,1,128,1,float16,float16,0,13.187936147054037
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,40,1,128,1,fp8,fp8,0,8.99127451578776
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,40,1,128,1,float16,fp8,0,13.032335917154947
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,40,2,128,1,float16,float16,0,13.133200327555338
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,40,2,128,1,float16,fp8,0,13.246543884277344
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,40,2,128,1,fp8,fp8,0,8.90943972269694
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,40,4,128,1,float16,fp8,0,12.996986389160156
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,40,4,128,1,float16,float16,0,13.275680541992188
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,1,128,1,float16,float16,0,6.397167841593425
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,40,4,128,1,fp8,fp8,0,8.88908831278483
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,40,8,128,1,float16,fp8,0,13.052698771158854
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,40,8,128,1,float16,float16,0,13.297215779622396
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,1,128,1,float16,fp8,0,6.639882405598958
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,40,8,128,1,fp8,fp8,0,9.025050481160482
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,40,128,1,float16,float16,0,6.71565310160319
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,40,128,1,fp8,fp8,0,4.576250712076823
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,1,128,1,fp8,fp8,0,4.32091744740804
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,40,128,1,float16,fp8,0,6.583695729573567
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,2,128,1,float16,float16,0,6.503936131795247
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,2,128,1,float16,fp8,0,6.550917307535808
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,2,128,1,fp8,fp8,0,4.474069277445476
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,4,128,1,float16,float16,0,6.418448130289714
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,4,128,1,float16,fp8,0,6.587472279866536
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,4,128,1,fp8,fp8,0,4.396106719970703
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,1,128,1,float16,float16,0,3.149653434753418
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,8,128,1,float16,float16,0,6.642853418986003
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,8,128,1,float16,fp8,0,6.323317209879558
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,1,128,1,float16,fp8,0,3.082261403401693
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,40,8,128,1,fp8,fp8,0,4.405221303304036
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,40,128,1,float16,float16,0,3.1954345703125
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,40,128,1,float16,fp8,0,3.211306571960449
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,40,128,1,fp8,fp8,0,2.321594715118408
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,1,128,1,fp8,fp8,0,2.213674704233805
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,2,128,1,float16,fp8,0,3.1418720881144204
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,2,128,1,float16,float16,0,3.190357208251953
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,2,128,1,fp8,fp8,0,2.2149759928385415
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,4,128,1,float16,float16,0,3.2035627365112305
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,4,128,1,float16,fp8,0,3.1704320907592773
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,4,128,1,fp8,fp8,0,2.2381919225056968
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,8,128,1,float16,float16,0,3.209184010823568
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,8,128,1,float16,fp8,0,3.234853426615397
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,40,8,128,1,fp8,fp8,0,2.244389375050863
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,1,128,1,float16,fp8,0,1.6358985900878906
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,1,128,1,float16,float16,0,1.6635200182596843
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,40,128,1,float16,fp8,0,1.6268266042073567
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,40,128,1,float16,float16,0,1.664650599161784
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,40,128,1,fp8,fp8,0,1.2200106779734294
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,1,128,1,fp8,fp8,0,1.1871840159098308
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,2,128,1,float16,float16,0,1.6553653081258137
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,2,128,1,float16,fp8,0,1.6258400281270344
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,2,128,1,fp8,fp8,0,1.1869226296742756
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,4,128,1,float16,float16,0,1.6695040067036946
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,4,128,1,float16,fp8,0,1.6242027282714844
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,4,128,1,fp8,fp8,0,1.1888586680094402
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,8,128,1,float16,float16,0,1.6580212910970051
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,8,128,1,float16,fp8,0,1.642149289449056
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,1,128,1,float16,float16,0,0.9077920118967692
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,1,128,1,float16,fp8,0,0.8972000281016032
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,40,128,1,float16,float16,0,0.9007306893666586
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,40,128,1,float16,fp8,0,0.9020053545633951
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,40,8,128,1,fp8,fp8,0,1.195039987564087
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,1,128,1,fp8,fp8,0,0.6420373519261678
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,40,128,1,fp8,fp8,0,0.6804906527201334
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,2,128,1,float16,float16,0,0.915770689646403
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,2,128,1,float16,fp8,0,0.8950613339742025
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,2,128,1,fp8,fp8,0,0.6432160139083862
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,4,128,1,float16,float16,0,0.9122400283813477
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,4,128,1,float16,fp8,0,0.902511994043986
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,4,128,1,fp8,fp8,0,0.6448800166447958
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,8,128,1,float16,float16,0,0.912874698638916
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,8,128,1,float16,fp8,0,0.9060320059458414
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,40,8,128,1,fp8,fp8,0,0.6504960060119629
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,40,1,128,1,float16,float16,0,12.691183725992838
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,40,1,128,1,fp8,fp8,0,8.897600173950195
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,40,2,128,1,float16,float16,0,12.921914418538412
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,40,2,128,1,fp8,fp8,0,8.910240173339844
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,40,1,128,1,float16,fp8,0,12.789578755696615
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,40,2,128,1,float16,fp8,0,12.806037902832031
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,40,4,128,1,float16,fp8,0,12.631813049316406
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,40,4,128,1,float16,float16,0,13.019023895263672
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,1,128,1,float16,float16,0,6.4542185465494795
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,40,4,128,1,fp8,fp8,0,8.88970692952474
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,1,128,1,float16,fp8,0,6.2312266031901045
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,40,8,128,1,fp8,fp8,0,9.038527806599935
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,40,8,128,1,float16,float16,0,12.920581817626953
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,40,8,128,1,float16,fp8,0,12.851056416829428
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,40,128,1,float16,fp8,0,6.535743713378906
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,40,128,1,fp8,fp8,0,4.638181368509929
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,40,128,1,float16,float16,0,6.470437367757161
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,1,128,1,fp8,fp8,0,4.327802658081055
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,2,128,1,float16,fp8,0,6.2428639729817705
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,2,128,1,fp8,fp8,0,4.341610590616862
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,2,128,1,float16,float16,0,6.242448170979817
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,4,128,1,float16,float16,0,6.4188798268636065
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,4,128,1,float16,fp8,0,6.342437108357747
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,4,128,1,fp8,fp8,0,4.436922709147136
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,8,128,1,float16,fp8,0,6.050517400105794
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,8,128,1,float16,float16,0,6.282314936319987
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,1,128,1,float16,float16,0,3.0918505986531577
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,1,128,1,float16,fp8,0,2.9878133138020835
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,40,8,128,1,fp8,fp8,0,4.471301396687825
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,1,128,1,fp8,fp8,0,2.1938613255818686
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,40,128,1,float16,float16,0,3.1320905685424805
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,40,128,1,float16,fp8,0,3.1672798792521157
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,2,128,1,float16,float16,0,3.05405330657959
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,40,128,1,fp8,fp8,0,2.3622026443481445
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,2,128,1,float16,fp8,0,2.9927892684936523
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,2,128,1,fp8,fp8,0,2.205296039581299
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,4,128,1,float16,float16,0,3.0401652654012046
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,4,128,1,fp8,fp8,0,2.211594740549723
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,4,128,1,float16,fp8,0,2.977498690287272
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,8,128,1,float16,float16,0,3.108330726623535
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,8,128,1,fp8,fp8,0,2.246074676513672
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,40,8,128,1,float16,fp8,0,3.0382560094197593
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,1,128,1,float16,float16,0,1.5662186940511067
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,1,128,1,float16,fp8,0,1.5382827123006184
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,40,128,1,float16,float16,0,1.5980745951334636
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,40,128,1,float16,fp8,0,1.5881813367207844
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,1,128,1,fp8,fp8,0,1.155242681503296
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,2,128,1,float16,float16,0,1.5824799537658691
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,2,128,1,float16,fp8,0,1.5559253692626953
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,40,128,1,fp8,fp8,0,1.2302292982737224
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,2,128,1,fp8,fp8,0,1.1565386454264324
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,4,128,1,float16,float16,0,1.5692906379699707
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,4,128,1,float16,fp8,0,1.5525706609090169
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,4,128,1,fp8,fp8,0,1.1613759994506836
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,8,128,1,float16,fp8,0,1.5706027348836262
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,8,128,1,fp8,fp8,0,1.1749227046966553
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,40,8,128,1,float16,float16,0,1.5974453290303547
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,1,128,1,float16,float16,0,0.8418933550516764
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,1,128,1,float16,fp8,0,0.8271733125050863
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,1,128,1,fp8,fp8,0,0.6332373221715292
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,40,128,1,float16,float16,0,0.8489973545074463
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,40,128,1,float16,fp8,0,0.8409813245137533
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,2,128,1,float16,float16,0,0.8471466700236002
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,40,128,1,fp8,fp8,0,0.6602986653645834
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,2,128,1,float16,fp8,0,0.832085371017456
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,2,128,1,fp8,fp8,0,0.6307466824849447
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,4,128,1,float16,float16,0,0.8503519694010416
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,4,128,1,float16,fp8,0,0.835040012995402
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,4,128,1,fp8,fp8,0,0.6352693239847819
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,8,128,1,float16,float16,0,0.8467413584391276
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,8,128,1,float16,fp8,0,0.8372800350189209
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,40,8,128,1,fp8,fp8,0,0.6392639875411987
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,40,128,1,float16,float16,0,0.48365334669748944
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,40,128,1,fp8,fp8,0,0.37240533034006756
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,40,128,1,float16,fp8,0,0.4750239849090576
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,1,128,1,float16,float16,0,0.4846026500066121
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,1,128,1,float16,fp8,0,0.47202666600545246
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,1,128,1,fp8,fp8,0,0.3507680098215739
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,2,128,1,float16,float16,0,0.4838026762008667
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,2,128,1,float16,fp8,0,0.47222399711608887
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,4,128,1,float16,float16,0,0.48655466238657635
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,4,128,1,float16,fp8,0,0.47362665335337323
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,2,128,1,fp8,fp8,0,0.3524693250656128
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,4,128,1,fp8,fp8,0,0.35470398267110187
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,8,128,1,float16,float16,0,0.4843360185623169
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,8,128,1,float16,fp8,0,0.4747146765391032
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,40,8,128,1,fp8,fp8,0,0.3564586639404297
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,40,1,128,1,float16,fp8,0,7.634021123250325
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,40,1,128,1,fp8,fp8,0,5.6191361745198565
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,40,2,128,1,float16,float16,0,7.725370407104492
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,40,2,128,1,fp8,fp8,0,5.46882692972819
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,40,2,128,1,float16,fp8,0,7.735104242960612
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,40,1,128,1,float16,float16,0,7.696959813435872
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,40,4,128,1,float16,float16,0,7.851888020833333
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,40,4,128,1,float16,fp8,0,7.826010386149089
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,1,128,1,float16,float16,0,3.707162539164225
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,40,4,128,1,fp8,fp8,0,5.622320175170898
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,40,8,128,1,fp8,fp8,0,5.6019948323567705
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,40,8,128,1,float16,float16,0,7.974079767862956
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,1,128,1,float16,fp8,0,3.6346613566080728
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,40,8,128,1,float16,fp8,0,7.691802978515625
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,40,128,1,float16,float16,0,3.954773267110189
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,40,128,1,float16,fp8,0,3.76091734568278
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,40,128,1,fp8,fp8,0,2.9709599812825522
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,1,128,1,fp8,fp8,0,2.7419894536336265
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,2,128,1,float16,float16,0,3.7838398615519204
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,2,128,1,float16,fp8,0,3.7827892303466797
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,2,128,1,fp8,fp8,0,2.759728113810221
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,4,128,1,float16,float16,0,3.735504150390625
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,4,128,1,fp8,fp8,0,2.766576131184896
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,4,128,1,float16,fp8,0,3.6614131927490234
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,8,128,1,float16,float16,0,3.7201919555664062
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,8,128,1,float16,fp8,0,3.666682561238607
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,1,128,1,float16,float16,0,1.8733760515848796
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,1,128,1,float16,fp8,0,1.853999932607015
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,1,128,1,fp8,fp8,0,1.4092906316121419
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,40,8,128,1,fp8,fp8,0,2.8039573033650718
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,2,128,1,float16,float16,0,1.8866186141967773
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,40,128,1,float16,fp8,0,1.9389066696166992
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,2,128,1,float16,fp8,0,1.8414026896158855
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,40,128,1,float16,float16,0,1.9407359759012859
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,40,128,1,fp8,fp8,0,1.5390933354695637
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,2,128,1,fp8,fp8,0,1.4143253962198894
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,4,128,1,float16,float16,0,1.8837439219156902
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,4,128,1,float16,fp8,0,1.8524586359659831
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,4,128,1,fp8,fp8,0,1.4267306327819824
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,1,128,1,float16,float16,0,0.9830559889475504
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,8,128,1,float16,fp8,0,1.8565173149108887
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,8,128,1,fp8,fp8,0,1.4358506202697754
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,40,8,128,1,float16,float16,0,1.9064586957295735
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,40,128,1,float16,float16,0,1.0178293387095134
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,40,128,1,float16,fp8,0,1.0102612972259521
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,1,128,1,float16,fp8,0,0.9644906520843506
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,1,128,1,fp8,fp8,0,0.7460587024688721
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,40,128,1,fp8,fp8,0,0.8081386884053549
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,2,128,1,float16,float16,0,0.9862666924794515
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,2,128,1,float16,fp8,0,0.965552012125651
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,2,128,1,fp8,fp8,0,0.7500693003336588
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,4,128,1,float16,float16,0,0.993834654490153
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,4,128,1,float16,fp8,0,0.9773493607838949
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,4,128,1,fp8,fp8,0,0.7566880385080973
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,8,128,1,float16,float16,0,0.9926719665527344
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,8,128,1,float16,fp8,0,0.9745120207468668
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,40,8,128,1,fp8,fp8,0,0.7618079980214437
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,1,128,1,float16,float16,0,0.5423306624094645
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,40,128,1,float16,fp8,0,0.5426986614863077
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,40,128,1,float16,float16,0,0.5449119806289673
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,1,128,1,float16,fp8,0,0.5290826559066772
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,40,128,1,fp8,fp8,0,0.4503893454869588
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,1,128,1,fp8,fp8,0,0.401583989461263
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,2,128,1,float16,float16,0,0.5411413510640463
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,2,128,1,float16,fp8,0,0.5276266733805338
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,2,128,1,fp8,fp8,0,0.40425066153208417
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,4,128,1,float16,float16,0,0.543018658955892
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,4,128,1,float16,fp8,0,0.527509331703186
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,4,128,1,fp8,fp8,0,0.4071679910024007
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,8,128,1,float16,float16,0,0.5455199877421061
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,8,128,1,float16,fp8,0,0.5323839982350668
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,1,128,1,float16,float16,0,0.29234133164087933
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,40,8,128,1,fp8,fp8,0,0.4087680180867513
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,1,128,1,float16,fp8,0,0.2868853410085042
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,40,128,1,float16,float16,0,0.3009120027224223
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,40,128,1,float16,fp8,0,0.2999946673711141
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,40,128,1,fp8,fp8,0,0.25275200605392456
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,2,128,1,float16,float16,0,0.2935146689414978
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,1,128,1,fp8,fp8,0,0.23817066351572672
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,2,128,1,fp8,fp8,0,0.23663999636967978
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,2,128,1,float16,fp8,0,0.2844906648000081
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,4,128,1,float16,float16,0,0.29516265789667767
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,4,128,1,float16,fp8,0,0.28614399830500287
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,4,128,1,fp8,fp8,0,0.23863999048868814
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,8,128,1,float16,float16,0,0.2956373294194539
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,8,128,1,float16,fp8,0,0.287717342376709
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,40,8,128,1,fp8,fp8,0,0.240831991036733
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,40,1,128,1,float16,float16,0,8.01917839050293
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,40,1,128,1,fp8,fp8,0,6.0138295491536455
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,40,1,128,1,float16,fp8,0,8.028725306193033
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,40,2,128,1,fp8,fp8,0,6.029189427693685
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,40,2,128,1,float16,float16,0,7.886304219563802
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,40,4,128,1,float16,float16,0,8.015034357706705
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,40,2,128,1,float16,fp8,0,7.8884531656901045
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,40,4,128,1,float16,fp8,0,7.835941314697266
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,1,128,1,float16,float16,0,3.8716214497884116
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,40,4,128,1,fp8,fp8,0,6.040117263793945
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,1,128,1,float16,fp8,0,3.7674878438313804
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,40,8,128,1,float16,float16,0,8.121162414550781
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,40,8,128,1,float16,fp8,0,8.026906967163086
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,40,128,1,float16,float16,0,4.072426795959473
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,40,8,128,1,fp8,fp8,0,6.124154408772786
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,40,128,1,float16,fp8,0,3.962688128153483
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,40,128,1,fp8,fp8,0,3.3194665908813477
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,1,128,1,fp8,fp8,0,2.9915199279785156
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,2,128,1,fp8,fp8,0,2.9982827504475913
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,2,128,1,float16,float16,0,3.895888010660807
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,2,128,1,float16,fp8,0,3.7526238759358725
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,4,128,1,float16,float16,0,3.8564907709757485
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,4,128,1,fp8,fp8,0,3.0198825200398765
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,4,128,1,float16,fp8,0,3.85643736521403
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,8,128,1,float16,float16,0,3.9295358657836914
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,8,128,1,float16,fp8,0,3.8735198974609375
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,1,128,1,float16,float16,0,1.9236213366190593
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,1,128,1,float16,fp8,0,1.8850186665852864
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,1,128,1,fp8,fp8,0,1.5114827156066895
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,40,8,128,1,fp8,fp8,0,3.063386599222819
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,2,128,1,float16,float16,0,1.9289333025614421
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,40,128,1,float16,float16,0,2.0479253133138022
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,40,128,1,float16,fp8,0,2.008549372355143
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,2,128,1,float16,fp8,0,1.8837706247965496
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,2,128,1,fp8,fp8,0,1.5142134030659993
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,40,128,1,fp8,fp8,0,1.6758186022440593
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,4,128,1,float16,float16,0,1.9465920130411785
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,4,128,1,fp8,fp8,0,1.5217119852701824
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,4,128,1,float16,fp8,0,1.8929813702901204
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,8,128,1,float16,float16,0,1.9612372716267903
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,8,128,1,fp8,fp8,0,1.5477333068847656
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,1,128,1,float16,float16,0,0.9981386661529541
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,40,128,1,float16,float16,0,1.0489173730214436
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,40,8,128,1,float16,fp8,0,1.912725289662679
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,1,128,1,float16,fp8,0,0.970970630645752
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,40,128,1,float16,fp8,0,1.0230186780293782
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,1,128,1,fp8,fp8,0,0.7879892985026041
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,2,128,1,float16,float16,0,0.9979999860127767
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,40,128,1,fp8,fp8,0,0.875221331914266
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,2,128,1,fp8,fp8,0,0.7873546282450358
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,2,128,1,float16,fp8,0,0.9777440230051676
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,4,128,1,float16,float16,0,1.002341349919637
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,4,128,1,float16,fp8,0,0.9775946935017904
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,4,128,1,fp8,fp8,0,0.7925493717193604
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,8,128,1,float16,float16,0,1.0118239720662434
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,8,128,1,float16,fp8,0,0.9878880182902018
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,1,128,1,float16,float16,0,0.5303413470586141
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,1,128,1,float16,fp8,0,0.518175999323527
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,40,8,128,1,fp8,fp8,0,0.8113866647084554
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,1,128,1,fp8,fp8,0,0.42603198687235516
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,40,128,1,float16,fp8,0,0.5433493455251058
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,40,128,1,float16,float16,0,0.5523893435796102
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,2,128,1,float16,float16,0,0.5333813428878784
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,2,128,1,float16,fp8,0,0.5198986530303955
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,2,128,1,fp8,fp8,0,0.426144003868103
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,40,128,1,fp8,fp8,0,0.47244266668955487
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,4,128,1,float16,float16,0,0.5377279917399088
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,4,128,1,float16,fp8,0,0.5252213478088379
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,4,128,1,fp8,fp8,0,0.4275306860605876
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,8,128,1,float16,float16,0,0.5392320156097412
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,8,128,1,float16,fp8,0,0.5266773303349813
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,40,128,1,float16,float16,0,0.30615999301274616
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,40,8,128,1,fp8,fp8,0,0.43301868438720703
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,40,128,1,float16,fp8,0,0.30505067110061646
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,1,128,1,float16,float16,0,0.3002506693204244
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,1,128,1,float16,fp8,0,0.28990934292475384
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,1,128,1,fp8,fp8,0,0.23286932706832886
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,40,128,1,fp8,fp8,0,0.26016000906626385
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,2,128,1,float16,float16,0,0.29975465933481854
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,2,128,1,float16,fp8,0,0.2916746735572815
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,4,128,1,float16,float16,0,0.30250134070714313
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,4,128,1,float16,fp8,0,0.29476267099380493
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,4,128,1,fp8,fp8,0,0.23324799537658691
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,8,128,1,float16,fp8,0,0.29713600873947144
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,8,128,1,float16,float16,0,0.3041386604309082
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,2,128,1,fp8,fp8,0,0.23189334074656168
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,40,8,128,1,fp8,fp8,0,0.23704532782236734
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,1,128,1,float16,float16,0,0.164901336034139
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,40,128,1,float16,float16,0,0.17289066314697266
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,40,128,1,float16,fp8,0,0.17148800690968832
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,40,128,1,fp8,fp8,0,0.1525813341140747
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,1,128,1,float16,fp8,0,0.1628213326136271
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,1,128,1,fp8,fp8,0,0.13825066884358725
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,2,128,1,float16,fp8,0,0.16173866391181946
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,2,128,1,fp8,fp8,0,0.13883733749389648
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,2,128,1,float16,float16,0,0.1655306617418925
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,4,128,1,float16,fp8,0,0.16379732886950174
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,4,128,1,fp8,fp8,0,0.1411946713924408
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,4,128,1,float16,float16,0,0.1662879983584086
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,8,128,1,float16,float16,0,0.16826132933298746
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,8,128,1,float16,fp8,0,0.16381333271662393
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,40,8,128,1,fp8,fp8,0,0.1427893340587616
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,40,1,128,1,float16,float16,0,5.044111887613933
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,40,1,128,1,fp8,fp8,0,4.060005187988281
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,40,2,128,1,float16,float16,0,5.119632085164388
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,40,2,128,1,fp8,fp8,0,4.047109285990397
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,40,2,128,1,float16,fp8,0,4.802864074707031
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,40,1,128,1,float16,fp8,0,4.845631917317708
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,40,4,128,1,float16,fp8,0,4.861050605773926
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,40,4,128,1,float16,float16,0,5.094944000244141
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,1,128,1,float16,float16,0,2.4828906059265137
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,40,4,128,1,fp8,fp8,0,4.07421334584554
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,40,8,128,1,fp8,fp8,0,4.120746612548828
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,40,8,128,1,float16,fp8,0,5.047264099121094
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,1,128,1,float16,fp8,0,2.4011306762695312
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,40,8,128,1,float16,float16,0,5.1191145579020185
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,40,128,1,float16,float16,0,2.6364533106486
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,40,128,1,fp8,fp8,0,2.2712586720784507
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,1,128,1,fp8,fp8,0,2.0106612841288247
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,40,128,1,float16,fp8,0,2.5982240041097007
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,2,128,1,float16,float16,0,2.4898880322774253
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,2,128,1,float16,fp8,0,2.4134772618611655
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,2,128,1,fp8,fp8,0,2.0211946169535318
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,4,128,1,float16,fp8,0,2.4130986531575522
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,4,128,1,float16,float16,0,2.4934239387512207
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,4,128,1,fp8,fp8,0,2.0343680381774902
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,8,128,1,float16,float16,0,2.5163413683573403
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,8,128,1,float16,fp8,0,2.4436747233072915
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,1,128,1,float16,float16,0,1.2566986878712971
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,1,128,1,float16,fp8,0,1.2198987007141113
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,40,8,128,1,fp8,fp8,0,2.0657706260681152
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,1,128,1,fp8,fp8,0,1.0229706764221191
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,40,128,1,float16,float16,0,1.3458773295084636
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,2,128,1,float16,fp8,0,1.2277759710947673
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,2,128,1,fp8,fp8,0,1.0262293020884197
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,40,128,1,float16,fp8,0,1.3258559703826904
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,2,128,1,float16,float16,0,1.257754643758138
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,40,128,1,fp8,fp8,0,1.1556533177693684
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,4,128,1,float16,float16,0,1.265669345855713
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,4,128,1,float16,fp8,0,1.2301546732584636
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,4,128,1,fp8,fp8,0,1.037274678548177
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,8,128,1,float16,float16,0,1.278213342030843
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,8,128,1,float16,fp8,0,1.2409333388010662
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,40,8,128,1,fp8,fp8,0,1.0489386717478435
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,40,128,1,float16,float16,0,0.689466635386149
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,40,128,1,float16,fp8,0,0.6773119767506918
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,1,128,1,float16,float16,0,0.6548266808191935
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,1,128,1,float16,fp8,0,0.6368533372879028
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,1,128,1,fp8,fp8,0,0.5396906534830729
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,2,128,1,float16,float16,0,0.6564373175303141
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,40,128,1,fp8,fp8,0,0.6086826721827189
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,2,128,1,float16,fp8,0,0.6399840116500854
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,4,128,1,float16,float16,0,0.6573119958241781
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,2,128,1,fp8,fp8,0,0.5371040105819702
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,4,128,1,float16,fp8,0,0.6422239939371744
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,4,128,1,fp8,fp8,0,0.5466933250427246
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,8,128,1,float16,fp8,0,0.6475199858347574
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,1,128,1,float16,float16,0,0.35304001967112225
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,8,128,1,float16,float16,0,0.6658293406168619
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,40,8,128,1,fp8,fp8,0,0.5528639952341715
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,40,128,1,float16,float16,0,0.3717120091120402
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,1,128,1,float16,fp8,0,0.345360000928243
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,40,128,1,float16,fp8,0,0.3648800055185954
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,40,128,1,fp8,fp8,0,0.3338346481323242
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,1,128,1,fp8,fp8,0,0.2815093398094177
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,2,128,1,float16,fp8,0,0.34702932834625244
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,2,128,1,float16,float16,0,0.3548106749852498
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,2,128,1,fp8,fp8,0,0.2845066587130229
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,4,128,1,float16,float16,0,0.3572426637013753
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,4,128,1,fp8,fp8,0,0.28570665915807086
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,8,128,1,float16,float16,0,0.35782400767008465
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,8,128,1,float16,fp8,0,0.35089067618052167
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,4,128,1,float16,fp8,0,0.3487786849339803
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,40,8,128,1,fp8,fp8,0,0.29206933577855426
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,40,128,1,float16,float16,0,0.2110933264096578
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,1,128,1,float16,float16,0,0.1949333349863688
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,40,128,1,float16,fp8,0,0.20974934101104736
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,1,128,1,float16,fp8,0,0.18870399395624796
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,1,128,1,fp8,fp8,0,0.16184533635775247
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,40,128,1,fp8,fp8,0,0.18108266592025757
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,2,128,1,float16,fp8,0,0.19100266695022583
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,2,128,1,fp8,fp8,0,0.16236799955368042
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,2,128,1,float16,float16,0,0.19637866814931235
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,4,128,1,float16,float16,0,0.1962560017903646
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,4,128,1,float16,fp8,0,0.19238932927449545
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,8,128,1,float16,float16,0,0.1978773276011149
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,8,128,1,fp8,fp8,0,0.16708266735076904
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,8,128,1,float16,fp8,0,0.1933599909146627
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,40,4,128,1,fp8,fp8,0,0.16368533174196878
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,40,128,1,float16,float16,0,0.12017066280047099
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,1,128,1,float16,float16,0,0.1142080028851827
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,40,128,1,fp8,fp8,0,0.1106773316860199
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,1,128,1,float16,fp8,0,0.11192533373832703
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,1,128,1,fp8,fp8,0,0.09968533118565877
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,40,128,1,float16,fp8,0,0.11847999691963196
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,2,128,1,float16,float16,0,0.11396800478299458
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,2,128,1,float16,fp8,0,0.11198932925860088
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,2,128,1,fp8,fp8,0,0.0992693305015564
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,4,128,1,float16,float16,0,0.11499200264612834
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,4,128,1,float16,fp8,0,0.1118986705938975
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,4,128,1,fp8,fp8,0,0.10038933157920837
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,8,128,1,float16,float16,0,0.11563733220100403
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,8,128,1,fp8,fp8,0,0.10354666908582051
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,40,8,128,1,float16,fp8,0,0.11378666758537292
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,40,1,128,1,float16,float16,0,5.022122701009114
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,40,2,128,1,float16,fp8,0,4.793045361836751
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,40,2,128,1,float16,float16,0,4.830608050028483
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,40,2,128,1,fp8,fp8,0,4.00872008005778
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,40,1,128,1,fp8,fp8,0,3.9917065302530923
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,40,1,128,1,float16,fp8,0,4.985232035319011
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,40,4,128,1,float16,float16,0,4.896608034769694
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,40,4,128,1,float16,fp8,0,5.093498547871907
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,1,128,1,float16,float16,0,2.3458827336629233
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,40,4,128,1,fp8,fp8,0,4.062495867411296
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,40,8,128,1,float16,fp8,0,4.979562759399414
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,40,8,128,1,float16,float16,0,5.051679929097493
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,1,128,1,float16,fp8,0,2.3454507191975913
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,40,8,128,1,fp8,fp8,0,4.135583877563477
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,40,128,1,float16,float16,0,2.8173653284708657
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,40,128,1,fp8,fp8,0,2.3167893091837564
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,1,128,1,fp8,fp8,0,1.9800052642822266
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,40,128,1,float16,fp8,0,2.755429267883301
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,2,128,1,float16,float16,0,2.355269273122152
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,2,128,1,float16,fp8,0,2.3569119771321616
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,2,128,1,fp8,fp8,0,2.013136068979899
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,4,128,1,float16,fp8,0,2.4429492950439453
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,4,128,1,float16,float16,0,2.4497599601745605
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,4,128,1,fp8,fp8,0,2.0348854064941406
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,8,128,1,float16,fp8,0,2.483935991923014
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,8,128,1,float16,float16,0,2.4825332959493003
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,1,128,1,float16,float16,0,1.1915466785430908
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,1,128,1,float16,fp8,0,1.1846986611684163
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,40,8,128,1,fp8,fp8,0,2.060208002726237
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,1,128,1,fp8,fp8,0,0.9847466945648193
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,2,128,1,float16,float16,0,1.1911253134409587
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,40,128,1,float16,float16,0,1.3819999694824219
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,2,128,1,float16,fp8,0,1.1945226987202961
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,40,128,1,float16,fp8,0,1.3615412712097168
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,40,128,1,fp8,fp8,0,1.1604586442311604
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,2,128,1,fp8,fp8,0,1.0000800291697185
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,4,128,1,float16,float16,0,1.2010666529337566
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,4,128,1,float16,fp8,0,1.2057279745737712
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,4,128,1,fp8,fp8,0,1.0201173623402913
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,8,128,1,float16,float16,0,1.247162659962972
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,8,128,1,float16,fp8,0,1.2454559803009033
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,1,128,1,float16,fp8,0,0.605125347773234
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,40,8,128,1,fp8,fp8,0,1.0393226941426594
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,1,128,1,float16,float16,0,0.6049226522445679
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,1,128,1,fp8,fp8,0,0.4968479871749878
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,40,128,1,float16,float16,0,0.7004053592681885
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,40,128,1,float16,fp8,0,0.6835306485493978
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,40,128,1,fp8,fp8,0,0.5874293247858683
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,2,128,1,float16,float16,0,0.6105706691741943
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,2,128,1,float16,fp8,0,0.6084479888280233
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,2,128,1,fp8,fp8,0,0.5045119921366373
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,4,128,1,float16,float16,0,0.6130293210347494
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,4,128,1,float16,fp8,0,0.6150613228480021
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,4,128,1,fp8,fp8,0,0.5122773249944051
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,8,128,1,float16,float16,0,0.622218648592631
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,8,128,1,float16,fp8,0,0.6204799811045328
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,1,128,1,float16,float16,0,0.31464533011118573
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,40,8,128,1,fp8,fp8,0,0.5231146812438965
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,1,128,1,float16,fp8,0,0.3159839908281962
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,1,128,1,fp8,fp8,0,0.260261336962382
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,40,128,1,float16,float16,0,0.36164267857869464
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,40,128,1,fp8,fp8,0,0.30249067147572833
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,40,128,1,float16,fp8,0,0.35395201047261554
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,2,128,1,float16,float16,0,0.3158773382504781
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,2,128,1,float16,fp8,0,0.3172159989674886
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,2,128,1,fp8,fp8,0,0.2619040012359619
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,4,128,1,float16,float16,0,0.3219946622848511
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,4,128,1,float16,fp8,0,0.32132800420125324
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,4,128,1,fp8,fp8,0,0.265173335870107
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,8,128,1,float16,float16,0,0.3240266640981038
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,8,128,1,float16,fp8,0,0.32449066638946533
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,40,8,128,1,fp8,fp8,0,0.2697226603825887
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,40,128,1,float16,float16,0,0.1952213247617086
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,40,128,1,float16,fp8,0,0.19073599576950073
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,1,128,1,float16,float16,0,0.17134400208791098
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,1,128,1,float16,fp8,0,0.1707786719004313
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,40,128,1,fp8,fp8,0,0.15819733341534933
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,1,128,1,fp8,fp8,0,0.1320799986521403
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,2,128,1,float16,fp8,0,0.17237865924835205
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,2,128,1,fp8,fp8,0,0.13274666666984558
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,4,128,1,float16,float16,0,0.1748746633529663
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,4,128,1,float16,fp8,0,0.17505067586898804
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,2,128,1,float16,float16,0,0.17254932721455893
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,4,128,1,fp8,fp8,0,0.13475733002026877
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,8,128,1,float16,fp8,0,0.1753013332684835
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,8,128,1,float16,float16,0,0.17657599846522012
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,40,8,128,1,fp8,fp8,0,0.13678933183352152
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,40,128,1,float16,float16,0,0.10614933570226033
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,1,128,1,float16,float16,0,0.09082667032877605
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,1,128,1,float16,fp8,0,0.09097066521644592
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,40,128,1,float16,fp8,0,0.10333333412806193
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,40,128,1,fp8,fp8,0,0.08717866738637288
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,2,128,1,float16,float16,0,0.0918239951133728
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,1,128,1,fp8,fp8,0,0.0747626672188441
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,2,128,1,float16,fp8,0,0.09122133255004883
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,2,128,1,fp8,fp8,0,0.07483733197053273
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,4,128,1,float16,float16,0,0.09240000446637471
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,4,128,1,float16,fp8,0,0.092031995455424
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,4,128,1,fp8,fp8,0,0.0765119989713033
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,8,128,1,float16,fp8,0,0.09314666191736858
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,8,128,1,fp8,fp8,0,0.07803733150164287
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,40,8,128,1,float16,float16,0,0.09311466415723164
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,40,128,1,float16,float16,0,0.058746665716171265
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,40,128,1,float16,fp8,0,0.05653866628805796
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,1,128,1,float16,fp8,0,0.0525493323802948
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,1,128,1,fp8,fp8,0,0.04426133135954539
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,1,128,1,float16,float16,0,0.05217066903909048
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,2,128,1,float16,float16,0,0.052005335688591
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,40,128,1,fp8,fp8,0,0.051301335295041404
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,2,128,1,float16,fp8,0,0.05241066714127859
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,2,128,1,fp8,fp8,0,0.043951998154322304
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,4,128,1,float16,float16,0,0.0526506652434667
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,4,128,1,float16,fp8,0,0.053727999329566956
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,4,128,1,fp8,fp8,0,0.04594666759173075
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,8,128,1,float16,float16,0,0.053344001372655235
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,8,128,1,float16,fp8,0,0.05356800059477488
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,40,8,128,1,fp8,fp8,0,0.04850666721661886
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,40,1,128,1,float16,fp8,0,3.676874796549479
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,40,1,128,1,fp8,fp8,0,3.304837226867676
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,40,2,128,1,float16,fp8,0,3.766453425089518
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,40,1,128,1,float16,float16,0,3.7912158966064453
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,40,2,128,1,float16,float16,0,3.7807785669962564
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,40,2,128,1,fp8,fp8,0,3.326666514078776
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,40,4,128,1,float16,float16,0,3.835066795349121
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,40,4,128,1,float16,fp8,0,3.8486239115397134
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,40,4,128,1,fp8,fp8,0,3.3920532862345376
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,1,128,1,float16,float16,0,1.8225599924723308
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,40,8,128,1,float16,float16,0,3.9135360717773438
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,40,8,128,1,float16,fp8,0,3.9081013997395835
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,40,8,128,1,fp8,fp8,0,3.4627307256062827
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,1,128,1,float16,fp8,0,1.8188799222310383
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,40,128,1,float16,float16,0,2.321664015452067
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,1,128,1,fp8,fp8,0,1.6519145965576172
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,40,128,1,float16,fp8,0,2.219663937886556
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,40,128,1,fp8,fp8,0,2.0071147282918296
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,2,128,1,float16,float16,0,1.8346880276997883
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,2,128,1,float16,fp8,0,1.8736426035563152
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,2,128,1,fp8,fp8,0,1.66211732228597
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,4,128,1,float16,float16,0,1.910037358601888
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,4,128,1,float16,fp8,0,1.9208106994628906
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,4,128,1,fp8,fp8,0,1.703279972076416
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,8,128,1,float16,fp8,0,1.9480373064676921
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,8,128,1,float16,float16,0,1.9625546137491863
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,1,128,1,float16,float16,0,0.9269440174102783
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,40,8,128,1,fp8,fp8,0,1.725338617960612
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,1,128,1,float16,fp8,0,0.9250666300455729
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,40,128,1,float16,float16,0,1.1487733523050945
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,1,128,1,fp8,fp8,0,0.8053973515828451
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,2,128,1,float16,float16,0,0.9259946346282959
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,2,128,1,float16,fp8,0,0.9300320148468018
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,2,128,1,fp8,fp8,0,0.8339626789093018
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,40,128,1,float16,fp8,0,1.106160004933675
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,40,128,1,fp8,fp8,0,1.0090666611989338
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,4,128,1,float16,float16,0,0.9371679623921713
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,4,128,1,float16,fp8,0,0.9415787061055502
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,4,128,1,fp8,fp8,0,0.8499626318613688
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,8,128,1,float16,fp8,0,0.9802079995473226
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,8,128,1,fp8,fp8,0,0.8624853293100992
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,40,8,128,1,float16,float16,0,0.977888027826945
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,1,128,1,float16,float16,0,0.47141865889231366
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,1,128,1,float16,fp8,0,0.4692800045013428
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,40,128,1,float16,float16,0,0.5713066657384237
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,40,128,1,float16,fp8,0,0.5512266556421915
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,1,128,1,fp8,fp8,0,0.4098666508992513
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,2,128,1,float16,float16,0,0.4732853174209595
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,2,128,1,float16,fp8,0,0.4720960060755412
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,40,128,1,fp8,fp8,0,0.5063466628392538
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,2,128,1,fp8,fp8,0,0.4165653387705485
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,4,128,1,float16,fp8,0,0.4802560011545817
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,4,128,1,float16,float16,0,0.478005329767863
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,4,128,1,fp8,fp8,0,0.42230931917826336
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,8,128,1,float16,float16,0,0.4871786832809448
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,1,128,1,float16,float16,0,0.2439840038617452
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,8,128,1,float16,fp8,0,0.4855039914449056
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,40,8,128,1,fp8,fp8,0,0.43330665429433185
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,1,128,1,float16,fp8,0,0.24553066492080688
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,40,128,1,float16,fp8,0,0.28885332743326825
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,40,128,1,float16,float16,0,0.29758934179941815
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,40,128,1,fp8,fp8,0,0.25992000102996826
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,1,128,1,fp8,fp8,0,0.21581333875656128
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,2,128,1,float16,fp8,0,0.24778133630752563
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,2,128,1,float16,float16,0,0.24631466468175253
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,2,128,1,fp8,fp8,0,0.21641600131988525
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,4,128,1,float16,float16,0,0.24945066372553507
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,4,128,1,float16,fp8,0,0.25195199251174927
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,4,128,1,fp8,fp8,0,0.22102399667104086
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,8,128,1,float16,float16,0,0.2537386616071065
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,8,128,1,float16,fp8,0,0.2550133268038432
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,40,8,128,1,fp8,fp8,0,0.22387200593948364
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,40,128,1,float16,float16,0,0.1600266695022583
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,1,128,1,float16,float16,0,0.13263466954231262
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,40,128,1,float16,fp8,0,0.15525866548220316
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,1,128,1,fp8,fp8,0,0.10995200276374817
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,2,128,1,float16,float16,0,0.13380799690882364
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,40,128,1,fp8,fp8,0,0.1373973290125529
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,1,128,1,float16,fp8,0,0.1332266628742218
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,2,128,1,float16,fp8,0,0.13395733634630838
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,2,128,1,fp8,fp8,0,0.11061867078145345
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,4,128,1,float16,float16,0,0.13609066605567932
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,4,128,1,float16,fp8,0,0.1353600025177002
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,4,128,1,fp8,fp8,0,0.11261866490046184
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,8,128,1,float16,float16,0,0.13754133383433023
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,8,128,1,float16,fp8,0,0.1377173364162445
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,1,128,1,float16,float16,0,0.07489599784215291
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,40,128,1,float16,float16,0,0.09166399637858073
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,40,128,1,float16,fp8,0,0.08868799606959026
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,40,8,128,1,fp8,fp8,0,0.11546132961908977
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,40,128,1,fp8,fp8,0,0.07685866455237071
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,1,128,1,float16,fp8,0,0.07514133552710216
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,1,128,1,fp8,fp8,0,0.06285866598288219
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,2,128,1,float16,float16,0,0.07508799930413564
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,2,128,1,float16,fp8,0,0.0748586654663086
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,4,128,1,float16,float16,0,0.0757013310988744
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,2,128,1,fp8,fp8,0,0.06402133405208588
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,4,128,1,fp8,fp8,0,0.06470400094985962
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,4,128,1,float16,fp8,0,0.07632533212502797
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,8,128,1,float16,float16,0,0.07720000048478444
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,8,128,1,float16,fp8,0,0.07729599873224895
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,40,8,128,1,fp8,fp8,0,0.06683200101057689
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,40,128,1,float16,float16,0,0.05028266708056132
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,40,128,1,float16,fp8,0,0.04955733319123586
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,40,128,1,fp8,fp8,0,0.04597333570321401
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,1,128,1,float16,float16,0,0.043605332573254905
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,1,128,1,fp8,fp8,0,0.03794133414824804
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,2,128,1,float16,float16,0,0.04384533564249674
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,1,128,1,float16,fp8,0,0.0439626673857371
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,2,128,1,float16,fp8,0,0.04331199824810028
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,2,128,1,fp8,fp8,0,0.03832533210515976
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,4,128,1,float16,float16,0,0.04451199869314829
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,4,128,1,float16,fp8,0,0.04417600234349569
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,4,128,1,fp8,fp8,0,0.04035199930270513
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,8,128,1,float16,fp8,0,0.045552000403404236
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,8,128,1,float16,float16,0,0.0444106658299764
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,40,128,1,float16,fp8,0,0.030058667063713074
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,40,128,1,float16,float16,0,0.030794667700926464
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,40,128,1,fp8,fp8,0,0.0295413335164388
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,1,128,1,float16,float16,0,0.028202667832374573
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,40,8,128,1,fp8,fp8,0,0.04231466849644979
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,1,128,1,float16,fp8,0,0.028607999285062153
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,1,128,1,fp8,fp8,0,0.026357332865397137
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,2,128,1,float16,float16,0,0.02842666705449422
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,2,128,1,float16,fp8,0,0.02861333390076955
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,4,128,1,float16,float16,0,0.028533334533373516
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,2,128,1,fp8,fp8,0,0.026634665826956432
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,4,128,1,float16,fp8,0,0.028981332977612812
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,8,128,1,float16,float16,0,0.02921066681543986
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,4,128,1,fp8,fp8,0,0.02720000098148982
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,8,128,1,float16,fp8,0,0.02918400118748347
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,40,8,128,1,fp8,fp8,0,0.029135999580224354
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,40,1,128,1,fp8,fp8,0,1.3488160769144695
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,40,1,128,1,float16,fp8,0,1.5616532961527507
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,40,1,128,1,float16,float16,0,1.5593066215515137
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,40,2,128,1,float16,float16,0,1.5765280723571777
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,40,2,128,1,float16,fp8,0,1.6089919408162434
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,40,4,128,1,float16,float16,0,1.6370453834533691
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,40,2,128,1,fp8,fp8,0,1.3652853965759277
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,40,4,128,1,float16,fp8,0,1.6498986879984539
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,1,128,1,float16,float16,0,0.7898879845937093
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,40,4,128,1,fp8,fp8,0,1.4038079579671223
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,40,8,128,1,float16,float16,0,1.68940273920695
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,40,8,128,1,fp8,fp8,0,1.4464799563090007
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,1,128,1,float16,fp8,0,0.7898666858673096
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,40,128,1,float16,float16,0,0.9991520245869955
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,40,8,128,1,float16,fp8,0,1.686954657236735
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,40,128,1,float16,fp8,0,0.9799946943918864
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,40,128,1,fp8,fp8,0,0.856053352355957
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,1,128,1,fp8,fp8,0,0.6661920150121053
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,2,128,1,float16,float16,0,0.7947306632995605
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,2,128,1,float16,fp8,0,0.7936053276062012
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,2,128,1,fp8,fp8,0,0.6897119681040446
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,4,128,1,float16,float16,0,0.8094293276468912
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,4,128,1,float16,fp8,0,0.8064106305440267
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,4,128,1,fp8,fp8,0,0.7080533504486084
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,8,128,1,float16,float16,0,0.8384799957275391
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,8,128,1,float16,fp8,0,0.8397493362426758
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,1,128,1,float16,float16,0,0.4026506741841634
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,1,128,1,float16,fp8,0,0.40373865763346356
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,40,8,128,1,fp8,fp8,0,0.7220053672790527
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,40,128,1,float16,float16,0,0.5073599815368652
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,40,128,1,float16,fp8,0,0.49037333329518634
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,2,128,1,float16,float16,0,0.40558401743570965
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,1,128,1,fp8,fp8,0,0.3375999927520752
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,2,128,1,float16,fp8,0,0.40541334946950275
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,40,128,1,fp8,fp8,0,0.4369066556294759
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,2,128,1,fp8,fp8,0,0.3426293134689331
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,4,128,1,float16,float16,0,0.4097493489583333
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,4,128,1,float16,fp8,0,0.4102933406829834
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,4,128,1,fp8,fp8,0,0.35390400886535645
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,8,128,1,float16,float16,0,0.41842134793599445
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,8,128,1,float16,fp8,0,0.41816532611846924
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,1,128,1,float16,float16,0,0.21059733629226685
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,40,8,128,1,fp8,fp8,0,0.36666667461395264
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,40,128,1,float16,float16,0,0.2661653359731038
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,1,128,1,float16,fp8,0,0.21035732825597128
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,1,128,1,fp8,fp8,0,0.1782133380572001
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,40,128,1,float16,fp8,0,0.2568639914194743
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,40,128,1,fp8,fp8,0,0.22648000717163086
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,2,128,1,float16,float16,0,0.21171200275421143
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,2,128,1,fp8,fp8,0,0.17950934171676636
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,2,128,1,float16,fp8,0,0.2121386726697286
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,4,128,1,float16,float16,0,0.21531200408935547
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,4,128,1,float16,fp8,0,0.21547200282414755
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,4,128,1,fp8,fp8,0,0.18380266427993774
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,8,128,1,float16,float16,0,0.21857066949208578
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,8,128,1,fp8,fp8,0,0.1899999976158142
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,40,8,128,1,float16,fp8,0,0.21905599037806192
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,40,128,1,float16,float16,0,0.14230933785438538
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,40,128,1,float16,fp8,0,0.13894933462142944
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,1,128,1,float16,float16,0,0.11469866832097371
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,1,128,1,float16,fp8,0,0.1151626706123352
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,1,128,1,fp8,fp8,0,0.09874666730562846
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,40,128,1,fp8,fp8,0,0.12684266765912375
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,2,128,1,fp8,fp8,0,0.0993386705716451
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,2,128,1,float16,float16,0,0.11551466584205627
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,4,128,1,float16,fp8,0,0.11737066507339478
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,4,128,1,float16,float16,0,0.11754133303960164
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,2,128,1,float16,fp8,0,0.1157866617043813
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,4,128,1,fp8,fp8,0,0.1014453371365865
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,8,128,1,float16,float16,0,0.11875200271606445
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,8,128,1,float16,fp8,0,0.11905599633852641
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,40,8,128,1,fp8,fp8,0,0.1034346620241801
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,40,128,1,float16,float16,0,0.08739200234413147
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,40,128,1,float16,fp8,0,0.08416000008583069
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,1,128,1,float16,fp8,0,0.0664160003264745
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,40,128,1,fp8,fp8,0,0.07197866837183635
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,1,128,1,fp8,fp8,0,0.05600533386071523
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,2,128,1,float16,float16,0,0.06684799989064534
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,1,128,1,float16,float16,0,0.06643199920654297
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,2,128,1,float16,fp8,0,0.06676266590754192
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,2,128,1,fp8,fp8,0,0.05692266424496969
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,4,128,1,float16,float16,0,0.06724800169467926
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,4,128,1,float16,fp8,0,0.06754666566848755
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,4,128,1,fp8,fp8,0,0.05819733440876007
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,8,128,1,float16,float16,0,0.06874666611353557
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,8,128,1,float16,fp8,0,0.06901333232720692
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,40,128,1,float16,float16,0,0.0470773329337438
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,40,8,128,1,fp8,fp8,0,0.059845333298047386
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,1,128,1,float16,float16,0,0.03935466706752777
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,1,128,1,float16,fp8,0,0.04001600046952566
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,2,128,1,float16,float16,0,0.03984000037113825
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,1,128,1,fp8,fp8,0,0.03517866631348928
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,40,128,1,fp8,fp8,0,0.04310933252175649
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,2,128,1,float16,fp8,0,0.03959999978542328
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,40,128,1,float16,fp8,0,0.045978665351867676
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,4,128,1,float16,float16,0,0.04008000095685323
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,4,128,1,float16,fp8,0,0.04074666649103165
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,4,128,1,fp8,fp8,0,0.037605332831541695
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,8,128,1,float16,float16,0,0.04137066751718521
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,8,128,1,float16,fp8,0,0.04121066629886627
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,8,128,1,fp8,fp8,0,0.039818666875362396
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,40,2,128,1,fp8,fp8,0,0.036159999668598175
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,40,128,1,float16,float16,0,0.029680001238981884
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,1,128,1,float16,float16,0,0.02736533433198929
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,1,128,1,float16,fp8,0,0.02771199991305669
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,40,128,1,fp8,fp8,0,0.029045333464940388
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,40,128,1,float16,fp8,0,0.029616000751654308
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,1,128,1,fp8,fp8,0,0.026186667382717133
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,2,128,1,float16,float16,0,0.027434666951497395
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,2,128,1,float16,fp8,0,0.028016000986099243
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,4,128,1,float16,float16,0,0.02739199995994568
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,2,128,1,fp8,fp8,0,0.0260959987839063
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,4,128,1,float16,fp8,0,0.02829333394765854
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,4,128,1,fp8,fp8,0,0.027130665878454845
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,8,128,1,float16,float16,0,0.02831999957561493
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,8,128,1,fp8,fp8,0,0.02897600084543228
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,40,8,128,1,float16,fp8,0,0.028543998797734577
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,40,128,1,float16,float16,0,0.022154666483402252
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,40,128,1,float16,fp8,0,0.022645334402720135
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,1,128,1,float16,float16,0,0.02092266579469045
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,1,128,1,float16,fp8,0,0.02085866779088974
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,1,128,1,fp8,fp8,0,0.0198186660806338
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,2,128,1,float16,float16,0,0.02092266579469045
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,40,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,2,128,1,float16,fp8,0,0.02080533280968666
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,2,128,1,fp8,fp8,0,0.019920000185569126
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,4,128,1,float16,float16,0,0.02088533341884613
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,4,128,1,float16,fp8,0,0.02109866589307785
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,4,128,1,fp8,fp8,0,0.020207999895016353
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,8,128,1,float16,float16,0,0.020992000897725422
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,8,128,1,float16,fp8,0,0.021589333812395733
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,40,8,128,1,fp8,fp8,0,0.02060266708334287
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,40,1,128,1,float16,float16,0,0.7321173350016276
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,40,1,128,1,fp8,fp8,0,0.6605973243713379
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,40,1,128,1,float16,fp8,0,0.7314346631368002
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,40,2,128,1,float16,fp8,0,0.7395573457082113
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,40,2,128,1,fp8,fp8,0,0.6858879725138346
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,40,2,128,1,float16,float16,0,0.7374186515808105
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,40,4,128,1,float16,float16,0,0.7454400062561035
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,40,4,128,1,float16,fp8,0,0.7505333423614502
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,1,128,1,float16,float16,0,0.37804265817006427
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,40,4,128,1,fp8,fp8,0,0.7125439643859863
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,40,8,128,1,float16,float16,0,0.7743626435597738
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,40,8,128,1,float16,fp8,0,0.7814666430155436
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,40,8,128,1,fp8,fp8,0,0.7243200143178304
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,40,128,1,float16,float16,0,0.4976053237915039
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,40,128,1,float16,fp8,0,0.4742826620737712
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,40,128,1,fp8,fp8,0,0.434442679087321
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,1,128,1,fp8,fp8,0,0.3356320063273112
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,1,128,1,float16,fp8,0,0.376853346824646
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,2,128,1,float16,float16,0,0.3790880044301351
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,2,128,1,float16,fp8,0,0.378490686416626
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,2,128,1,fp8,fp8,0,0.34507731596628827
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,4,128,1,float16,float16,0,0.38399465878804523
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,4,128,1,fp8,fp8,0,0.3524693250656128
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,4,128,1,float16,fp8,0,0.38596800963083905
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,8,128,1,float16,float16,0,0.3922719955444336
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,8,128,1,float16,fp8,0,0.39124266306559247
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,1,128,1,float16,float16,0,0.1985493302345276
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,1,128,1,float16,fp8,0,0.19958933194478354
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,40,8,128,1,fp8,fp8,0,0.36910398801167804
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,1,128,1,fp8,fp8,0,0.17819732427597046
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,40,128,1,float16,float16,0,0.26337599754333496
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,40,128,1,float16,fp8,0,0.25414933760960895
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,40,128,1,fp8,fp8,0,0.2256586750348409
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,2,128,1,float16,float16,0,0.2007733384768168
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,2,128,1,float16,fp8,0,0.19977066914240518
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,2,128,1,fp8,fp8,0,0.18069867293039957
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,4,128,1,float16,float16,0,0.20283732811609903
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,4,128,1,fp8,fp8,0,0.18473066886266074
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,8,128,1,float16,float16,0,0.2071626583735148
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,4,128,1,float16,fp8,0,0.20341867208480835
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,8,128,1,float16,fp8,0,0.2065920035044352
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,40,8,128,1,fp8,fp8,0,0.18939733505249023
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,1,128,1,float16,float16,0,0.10949866970380147
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,1,128,1,float16,fp8,0,0.11041067043940227
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,40,128,1,float16,float16,0,0.14194666345914206
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,40,128,1,float16,fp8,0,0.13793067137400308
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,2,128,1,float16,float16,0,0.10945066809654236
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,40,128,1,fp8,fp8,0,0.12355200449625652
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,1,128,1,fp8,fp8,0,0.09843200445175171
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,2,128,1,float16,fp8,0,0.11108266313870747
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,2,128,1,fp8,fp8,0,0.09878933429718018
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,4,128,1,float16,fp8,0,0.11181333661079407
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,4,128,1,float16,float16,0,0.11123200257619222
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,4,128,1,fp8,fp8,0,0.1006719966729482
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,8,128,1,float16,float16,0,0.11359467109044392
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,8,128,1,float16,fp8,0,0.11392533779144287
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,40,8,128,1,fp8,fp8,0,0.10326932867368062
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,1,128,1,float16,float16,0,0.06177600224812826
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,40,128,1,float16,float16,0,0.08329600095748901
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,1,128,1,float16,fp8,0,0.06214400132497152
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,1,128,1,fp8,fp8,0,0.055786664287249245
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,40,128,1,float16,fp8,0,0.08018666505813599
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,2,128,1,float16,float16,0,0.062261333068211876
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,40,128,1,fp8,fp8,0,0.0713973343372345
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,2,128,1,float16,fp8,0,0.06206400195757548
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,2,128,1,fp8,fp8,0,0.056608001391092934
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,4,128,1,float16,float16,0,0.06378133098284404
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,4,128,1,fp8,fp8,0,0.05756799876689911
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,4,128,1,float16,fp8,0,0.06347199777762096
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,8,128,1,float16,fp8,0,0.0652106652657191
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,8,128,1,float16,float16,0,0.0650186687707901
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,40,8,128,1,fp8,fp8,0,0.0592853327592214
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,40,128,1,float16,float16,0,0.044719999035199486
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,1,128,1,float16,float16,0,0.037845333417256675
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,40,128,1,float16,fp8,0,0.04412800073623657
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,1,128,1,fp8,fp8,0,0.035162667433420815
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,1,128,1,float16,fp8,0,0.03833599885304769
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,2,128,1,float16,float16,0,0.03782399992148081
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,2,128,1,float16,fp8,0,0.038362666964530945
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,40,128,1,fp8,fp8,0,0.04377066592375437
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,2,128,1,fp8,fp8,0,0.03573333223660787
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,4,128,1,float16,fp8,0,0.039290666580200195
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,4,128,1,float16,float16,0,0.03845866769552231
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,4,128,1,fp8,fp8,0,0.03736000011364619
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,8,128,1,float16,fp8,0,0.04021333406368891
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,8,128,1,float16,float16,0,0.039317332208156586
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,40,8,128,1,fp8,fp8,0,0.039290666580200195
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,40,128,1,float16,float16,0,0.030095999439557392
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,40,128,1,fp8,fp8,0,0.028901333610216778
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,1,128,1,float16,float16,0,0.027823999524116516
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,1,128,1,fp8,fp8,0,0.02584533393383026
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,2,128,1,float16,float16,0,0.02770666778087616
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,40,128,1,float16,fp8,0,0.029535998900731403
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,1,128,1,float16,fp8,0,0.02811199923356374
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,2,128,1,float16,fp8,0,0.027850667635599773
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,4,128,1,float16,float16,0,0.028234665592511494
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,2,128,1,fp8,fp8,0,0.02619733413060506
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,4,128,1,fp8,fp8,0,0.027280000348885853
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,8,128,1,float16,float16,0,0.028325334191322327
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,4,128,1,float16,fp8,0,0.028373333315054577
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,8,128,1,float16,fp8,0,0.028991999725500744
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,40,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,40,128,1,float16,float16,0,0.020992000897725422
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,40,8,128,1,fp8,fp8,0,0.02773866554101308
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,40,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,1,128,1,float16,fp8,0,0.020010666300853092
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,1,128,1,float16,float16,0,0.019823999454577763
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,1,128,1,fp8,fp8,0,0.019808000574509304
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,2,128,1,float16,fp8,0,0.019920000185569126
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,2,128,1,float16,float16,0,0.019626667102177937
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,4,128,1,float16,fp8,0,0.02015999952952067
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,2,128,1,fp8,fp8,0,0.019808000574509304
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,4,128,1,float16,float16,0,0.020021333048741024
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,4,128,1,fp8,fp8,0,0.02033599962790807
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,8,128,1,float16,fp8,0,0.020234666764736176
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,8,128,1,float16,float16,0,0.01989866668979327
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,40,8,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,40,128,1,float16,float16,0,0.017610666652520496
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,40,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,40,128,1,fp8,fp8,0,0.01802666609485944
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,1,128,1,float16,fp8,0,0.017466666797796886
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,1,128,1,fp8,fp8,0,0.01754666616519292
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,2,128,1,float16,fp8,0,0.01775466650724411
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,2,128,1,fp8,fp8,0,0.01759999990463257
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,1,128,1,float16,float16,0,0.017152000218629837
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,4,128,1,float16,float16,0,0.01720533271630605
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,2,128,1,float16,float16,0,0.017242666333913803
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,4,128,1,float16,fp8,0,0.017653333644072216
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,4,128,1,fp8,fp8,0,0.017610666652520496
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,8,128,1,float16,float16,0,0.01729600007335345
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,8,128,1,float16,fp8,0,0.017968000223239262
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,40,8,128,1,fp8,fp8,0,0.017840000490347546
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,40,1,128,1,fp8,fp8,0,0.4651679992675781
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,40,1,128,1,float16,float16,0,0.4602133433024089
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,40,2,128,1,float16,float16,0,0.46262399355570477
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,40,1,128,1,float16,fp8,0,0.4599733352661133
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,40,2,128,1,fp8,fp8,0,0.46889599164326984
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,40,2,128,1,float16,fp8,0,0.4631839990615845
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,40,4,128,1,float16,float16,0,0.46798932552337646
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,40,4,128,1,float16,fp8,0,0.46747732162475586
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,1,128,1,float16,float16,0,0.23727999130884805
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,40,4,128,1,fp8,fp8,0,0.48122668266296387
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,40,8,128,1,float16,float16,0,0.47725868225097656
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,40,8,128,1,fp8,fp8,0,0.49380799134572345
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,40,8,128,1,float16,fp8,0,0.4747146765391032
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,40,128,1,float16,fp8,0,0.2662880023320516
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,40,128,1,float16,float16,0,0.2739413380622864
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,1,128,1,float16,fp8,0,0.23690134286880493
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,40,128,1,fp8,fp8,0,0.2874346574147542
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,1,128,1,fp8,fp8,0,0.24198933442433676
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,2,128,1,float16,float16,0,0.23891733090082803
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,2,128,1,fp8,fp8,0,0.2437866727511088
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,2,128,1,float16,fp8,0,0.2389706571896871
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,4,128,1,float16,float16,0,0.24279467264811197
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,4,128,1,fp8,fp8,0,0.24744532505671182
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,4,128,1,float16,fp8,0,0.24222399791081747
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,8,128,1,float16,float16,0,0.24554665883382162
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,8,128,1,float16,fp8,0,0.245088001092275
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,1,128,1,float16,float16,0,0.12691199779510498
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,1,128,1,float16,fp8,0,0.12723732988039652
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,1,128,1,fp8,fp8,0,0.13124266266822815
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,40,8,128,1,fp8,fp8,0,0.25177067518234253
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,40,128,1,float16,float16,0,0.14682132999102274
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,2,128,1,float16,float16,0,0.12801600495974222
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,40,128,1,fp8,fp8,0,0.1539253294467926
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,40,128,1,float16,fp8,0,0.14246400197347006
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,2,128,1,float16,fp8,0,0.12814933061599731
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,2,128,1,fp8,fp8,0,0.13210666179656982
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,4,128,1,float16,float16,0,0.1291146675745646
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,4,128,1,fp8,fp8,0,0.13366400202115378
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,4,128,1,float16,fp8,0,0.1306826670964559
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,8,128,1,float16,float16,0,0.1306880017121633
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,8,128,1,float16,fp8,0,0.13133866588274637
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,40,8,128,1,fp8,fp8,0,0.1361066699028015
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,40,128,1,float16,float16,0,0.08241599798202515
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,40,128,1,float16,fp8,0,0.08055466910203297
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,1,128,1,float16,float16,0,0.07026133437951405
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,40,128,1,fp8,fp8,0,0.08726400136947632
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,1,128,1,fp8,fp8,0,0.07340799768765767
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,1,128,1,float16,fp8,0,0.07122133175532024
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,2,128,1,float16,float16,0,0.07054399947325389
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,2,128,1,float16,fp8,0,0.07098666826883952
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,4,128,1,float16,float16,0,0.07178666690985362
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,2,128,1,fp8,fp8,0,0.07489066819349925
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,4,128,1,fp8,fp8,0,0.07538666824499766
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,4,128,1,float16,fp8,0,0.07218133409818013
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,8,128,1,float16,float16,0,0.07298666735490163
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,8,128,1,fp8,fp8,0,0.07712533573309581
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,40,8,128,1,float16,fp8,0,0.07394666473070781
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,1,128,1,float16,float16,0,0.04102933406829834
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,40,128,1,float16,fp8,0,0.043375998735427856
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,40,128,1,float16,float16,0,0.04428799947102865
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,1,128,1,fp8,fp8,0,0.04236799975236257
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,1,128,1,float16,fp8,0,0.04099733382463455
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,40,128,1,fp8,fp8,0,0.049957334995269775
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,2,128,1,float16,float16,0,0.0407679999868075
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,2,128,1,float16,fp8,0,0.041349334021409355
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,2,128,1,fp8,fp8,0,0.04288533329963684
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,4,128,1,float16,float16,0,0.041738669077555336
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,4,128,1,float16,fp8,0,0.04205866654713949
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,8,128,1,float16,fp8,0,0.042725334564844765
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,8,128,1,float16,float16,0,0.042064001162846885
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,4,128,1,fp8,fp8,0,0.044954667488733925
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,40,8,128,1,fp8,fp8,0,0.0462773342927297
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,40,128,1,float16,float16,0,0.028575999041398365
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,40,128,1,float16,fp8,0,0.029477333029111225
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,40,128,1,fp8,fp8,0,0.03242133309443792
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,1,128,1,float16,float16,0,0.028234665592511494
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,1,128,1,float16,fp8,0,0.02844800055027008
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,1,128,1,fp8,fp8,0,0.02979733298222224
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,2,128,1,float16,fp8,0,0.028607999285062153
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,2,128,1,float16,float16,0,0.028543998797734577
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,4,128,1,float16,float16,0,0.028512001037597656
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,2,128,1,fp8,fp8,0,0.029829333225886028
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,4,128,1,fp8,fp8,0,0.030415999392668407
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,4,128,1,float16,fp8,0,0.028991999725500744
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,8,128,1,float16,fp8,0,0.029552000264326733
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,8,128,1,float16,float16,0,0.02889599899450938
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,40,8,128,1,fp8,fp8,0,0.03202133377393087
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,40,128,1,float16,fp8,0,0.022778667509555817
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,40,128,1,fp8,fp8,0,0.024085332949956257
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,1,128,1,float16,float16,0,0.022143999735514324
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,40,128,1,float16,float16,0,0.022613334159056347
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,1,128,1,float16,fp8,0,0.021984001000722248
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,2,128,1,float16,float16,0,0.021930667261282604
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,1,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,2,128,1,float16,fp8,0,0.02199999988079071
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,2,128,1,fp8,fp8,0,0.023445333043734234
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,4,128,1,float16,float16,0,0.022106667359670002
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,4,128,1,float16,fp8,0,0.022272000710169475
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,8,128,1,float16,float16,0,0.02186133215824763
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,8,128,1,float16,fp8,0,0.02231466770172119
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,4,128,1,fp8,fp8,0,0.023936000963052113
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,40,8,128,1,fp8,fp8,0,0.023957334458827972
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,40,128,1,float16,float16,0,0.016586666305859882
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,40,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,40,128,1,fp8,fp8,0,0.018810667097568512
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,1,128,1,float16,float16,0,0.016106666376193363
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,1,128,1,float16,fp8,0,0.0164533331990242
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,1,128,1,fp8,fp8,0,0.01730666682124138
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,2,128,1,float16,fp8,0,0.016384000579516094
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,2,128,1,float16,float16,0,0.01621866722901662
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,2,128,1,fp8,fp8,0,0.017509333789348602
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,4,128,1,float16,float16,0,0.0161013330022494
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,4,128,1,float16,fp8,0,0.01670933390657107
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,4,128,1,fp8,fp8,0,0.017642666896184284
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,8,128,1,float16,float16,0,0.01624533285697301
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,8,128,1,float16,fp8,0,0.016629333297411602
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,40,8,128,1,fp8,fp8,0,0.017808000246683758
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,40,128,1,float16,float16,0,0.015696000307798386
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,40,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,1,128,1,float16,float16,0,0.01570133368174235
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,40,128,1,float16,fp8,0,0.016117333124081295
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,1,128,1,fp8,fp8,0,0.016575999557971954
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,2,128,1,float16,fp8,0,0.01597333326935768
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,2,128,1,float16,float16,0,0.01551466683546702
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,2,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,1,128,1,float16,fp8,0,0.016149333367745083
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,4,128,1,float16,float16,0,0.015509333461523056
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,4,128,1,float16,fp8,0,0.016176000237464905
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,8,128,1,float16,fp8,0,0.016069332758585613
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,8,128,1,float16,float16,0,0.015450666348139444
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,8,128,1,fp8,fp8,0,0.016762666404247284
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,40,4,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,40,1,128,1,float16,float16,0,0.31751465797424316
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,40,1,128,1,float16,fp8,0,0.31833599011103314
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,40,2,128,1,float16,float16,0,0.32182933886845905
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,40,2,128,1,float16,fp8,0,0.3213119904200236
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,40,2,128,1,fp8,fp8,0,0.3775999943415324
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,40,1,128,1,fp8,fp8,0,0.375274658203125
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,40,4,128,1,float16,fp8,0,0.3264159957567851
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,40,4,128,1,float16,float16,0,0.3266826669375102
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,40,4,128,1,fp8,fp8,0,0.38094401359558105
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,1,128,1,float16,float16,0,0.16648000478744507
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,40,8,128,1,float16,float16,0,0.3323520024617513
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,40,8,128,1,float16,fp8,0,0.33222933610280353
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,40,8,128,1,fp8,fp8,0,0.3838506539662679
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,40,128,1,float16,float16,0,0.18410666783650717
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,40,128,1,float16,fp8,0,0.18012267351150513
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,40,128,1,fp8,fp8,0,0.22075200080871582
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,1,128,1,float16,fp8,0,0.16693333784739176
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,1,128,1,fp8,fp8,0,0.2001333236694336
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,2,128,1,float16,fp8,0,0.16932799418767294
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,2,128,1,fp8,fp8,0,0.19904534022013345
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,2,128,1,float16,float16,0,0.16785067319869995
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,4,128,1,float16,float16,0,0.1704639991124471
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,4,128,1,float16,fp8,0,0.17018665870030722
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,4,128,1,fp8,fp8,0,0.20273600021998087
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,8,128,1,float16,float16,0,0.1723733345667521
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,8,128,1,float16,fp8,0,0.1724053422609965
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,1,128,1,float16,float16,0,0.08898133039474487
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,40,8,128,1,fp8,fp8,0,0.20296533902486166
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,1,128,1,float16,fp8,0,0.0893440047899882
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,40,128,1,float16,float16,0,0.10084799925486247
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,40,128,1,fp8,fp8,0,0.12075733145078023
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,1,128,1,fp8,fp8,0,0.10831466317176819
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,40,128,1,float16,fp8,0,0.09888000289599101
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,2,128,1,float16,float16,0,0.08872532844543457
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,2,128,1,float16,fp8,0,0.08923199772834778
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,2,128,1,fp8,fp8,0,0.10876799623171489
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,4,128,1,float16,float16,0,0.09016000231107076
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,4,128,1,fp8,fp8,0,0.11000000437100728
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,4,128,1,float16,fp8,0,0.09052800138791402
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,8,128,1,float16,float16,0,0.0925333301226298
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,8,128,1,float16,fp8,0,0.09296000003814697
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,40,8,128,1,fp8,fp8,0,0.11241066455841064
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,40,128,1,float16,float16,0,0.0537120004494985
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,1,128,1,float16,float16,0,0.04931733508904775
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,40,128,1,float16,fp8,0,0.05329066514968872
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,1,128,1,float16,fp8,0,0.04997866849104563
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,40,128,1,fp8,fp8,0,0.06708799799283345
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,1,128,1,fp8,fp8,0,0.060005332032839455
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,2,128,1,float16,fp8,0,0.05002133548259735
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,4,128,1,float16,float16,0,0.05095999936262766
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,2,128,1,fp8,fp8,0,0.060218666990598045
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,4,128,1,float16,fp8,0,0.0510506679614385
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,2,128,1,float16,float16,0,0.050000001986821495
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,4,128,1,fp8,fp8,0,0.06214933097362518
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,8,128,1,float16,float16,0,0.051072001457214355
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,8,128,1,float16,fp8,0,0.051882664362589516
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,40,8,128,1,fp8,fp8,0,0.06365866462389629
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,40,128,1,float16,float16,0,0.0321066677570343
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,40,128,1,float16,fp8,0,0.03199466566244761
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,40,128,1,fp8,fp8,0,0.03959999978542328
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,1,128,1,float16,float16,0,0.031541332602500916
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,1,128,1,float16,fp8,0,0.031583999594052635
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,1,128,1,fp8,fp8,0,0.036789332826932274
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,2,128,1,float16,float16,0,0.03141866624355316
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,2,128,1,float16,fp8,0,0.03153600047032038
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,2,128,1,fp8,fp8,0,0.036501333117485046
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,4,128,1,float16,fp8,0,0.03193599979082743
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,4,128,1,float16,float16,0,0.0315786674618721
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,8,128,1,float16,float16,0,0.03197866678237915
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,8,128,1,fp8,fp8,0,0.039221333960692085
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,4,128,1,fp8,fp8,0,0.037690666814645134
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,40,8,128,1,float16,fp8,0,0.03282133241494497
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,40,128,1,float16,float16,0,0.023237332701683044
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,40,128,1,float16,fp8,0,0.023520000278949738
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,40,128,1,fp8,fp8,0,0.027450665831565857
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,1,128,1,float16,fp8,0,0.023178666830062866
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,1,128,1,float16,float16,0,0.02327466756105423
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,1,128,1,fp8,fp8,0,0.02666666607062022
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,2,128,1,float16,float16,0,0.023621333142121632
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,2,128,1,fp8,fp8,0,0.026554666459560394
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,4,128,1,float16,float16,0,0.022848000129063923
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,4,128,1,fp8,fp8,0,0.02719466636578242
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,4,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,8,128,1,float16,fp8,0,0.023631999890009563
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,8,128,1,float16,float16,0,0.02363733450571696
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,8,128,1,fp8,fp8,0,0.027999999622503918
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,40,2,128,1,float16,fp8,0,0.02345066765944163
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,1,128,1,float16,float16,0,0.018112000077962875
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,40,128,1,float16,fp8,0,0.018826667219400406
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,1,128,1,fp8,fp8,0,0.02107733239730199
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,1,128,1,float16,fp8,0,0.01801066721479098
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,40,128,1,fp8,fp8,0,0.021066665649414062
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,2,128,1,float16,float16,0,0.0183146670460701
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,40,128,1,float16,float16,0,0.018757333358128864
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,2,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,4,128,1,float16,float16,0,0.01848000039656957
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,4,128,1,float16,fp8,0,0.019039999693632126
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,2,128,1,float16,fp8,0,0.018053332964579265
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,4,128,1,fp8,fp8,0,0.021557333568731945
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,8,128,1,float16,float16,0,0.01854933301607768
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,8,128,1,float16,fp8,0,0.018954666952292126
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,40,128,1,float16,float16,0,0.015226667126019796
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,40,128,1,float16,fp8,0,0.01552533358335495
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,40,128,1,fp8,fp8,0,0.017535999417304993
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,1,128,1,float16,fp8,0,0.015568000574906668
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,1,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,2,128,1,float16,float16,0,0.014917333920796713
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,40,8,128,1,fp8,fp8,0,0.021503999829292297
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,2,128,1,float16,fp8,0,0.015381333728631338
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,2,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,4,128,1,float16,float16,0,0.014864000181357065
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,1,128,1,float16,float16,0,0.014896000425020853
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,4,128,1,float16,fp8,0,0.015520000209410986
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,8,128,1,float16,float16,0,0.014949332922697067
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,4,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,8,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,40,128,1,float16,fp8,0,0.014789332946141561
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,40,128,1,float16,float16,0,0.014138666292031607
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,40,8,128,1,float16,fp8,0,0.015674666812022526
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,1,128,1,float16,float16,0,0.014240000396966934
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,1,128,1,float16,fp8,0,0.014805333067973455
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,1,128,1,fp8,fp8,0,0.016250666230916977
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,40,128,1,fp8,fp8,0,0.016309333344300587
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,2,128,1,float16,float16,0,0.014554666976133982
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,2,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,2,128,1,fp8,fp8,0,0.016255999604860943
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,4,128,1,float16,fp8,0,0.014991999914248785
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,4,128,1,fp8,fp8,0,0.016122666498025257
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,8,128,1,float16,float16,0,0.014208000153303146
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,4,128,1,float16,float16,0,0.014266667266686758
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,8,128,1,fp8,fp8,0,0.0164533331990242
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,40,8,128,1,float16,fp8,0,0.01492799942692121
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,40,1,128,1,float16,float16,0,0.27406932910283405
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,40,1,128,1,float16,fp8,0,0.2733973264694214
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,40,1,128,1,fp8,fp8,0,0.32608532905578613
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,40,2,128,1,float16,fp8,0,0.2741386691729228
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,40,2,128,1,float16,float16,0,0.27409066756566364
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,40,4,128,1,float16,float16,0,0.2752479910850525
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,40,2,128,1,fp8,fp8,0,0.32705599069595337
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,40,4,128,1,fp8,fp8,0,0.3283573389053345
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,40,8,128,1,float16,float16,0,0.2757973273595174
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,40,4,128,1,float16,fp8,0,0.2757546703020732
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,40,8,128,1,float16,fp8,0,0.2764586607615153
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,40,8,128,1,fp8,fp8,0,0.33052267630894977
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,40,128,1,float16,float16,0,0.1411893367767334
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,40,128,1,float16,fp8,0,0.13934933145840964
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,1,128,1,float16,float16,0,0.14222400387128195
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,40,128,1,fp8,fp8,0,0.18283732732137045
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,1,128,1,float16,fp8,0,0.14251200358072916
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,2,128,1,float16,float16,0,0.14301333824793497
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,2,128,1,float16,fp8,0,0.14230400323867798
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,4,128,1,float16,float16,0,0.1444906691710154
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,1,128,1,fp8,fp8,0,0.17068266868591309
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,2,128,1,fp8,fp8,0,0.17182934284210205
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,4,128,1,float16,fp8,0,0.14476799964904785
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,8,128,1,float16,float16,0,0.14493866761525473
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,8,128,1,float16,fp8,0,0.14537066221237183
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,4,128,1,fp8,fp8,0,0.17406400044759116
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,40,8,128,1,fp8,fp8,0,0.17545066277186075
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,40,128,1,float16,float16,0,0.07486400008201599
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,40,128,1,fp8,fp8,0,0.0997920036315918
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,40,128,1,float16,fp8,0,0.07334400216738383
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,1,128,1,float16,float16,0,0.07740800082683563
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,1,128,1,float16,fp8,0,0.0776693324247996
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,1,128,1,fp8,fp8,0,0.0941493312517802
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,2,128,1,float16,fp8,0,0.07673066854476929
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,2,128,1,fp8,fp8,0,0.09410666426022847
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,4,128,1,float16,float16,0,0.07828799883524577
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,2,128,1,float16,float16,0,0.07734400033950806
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,4,128,1,fp8,fp8,0,0.0965173343817393
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,4,128,1,float16,fp8,0,0.07819200058778127
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,8,128,1,float16,float16,0,0.07794133325417836
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,8,128,1,float16,fp8,0,0.0796853353579839
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,40,128,1,float16,float16,0,0.04318400224049886
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,40,8,128,1,fp8,fp8,0,0.097680002450943
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,40,128,1,float16,fp8,0,0.04331733286380768
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,1,128,1,float16,float16,0,0.04595733185609182
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,40,128,1,fp8,fp8,0,0.05611733098824819
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,1,128,1,fp8,fp8,0,0.054133335749308266
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,2,128,1,float16,float16,0,0.0462773342927297
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,2,128,1,float16,fp8,0,0.046426668763160706
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,2,128,1,fp8,fp8,0,0.05393599967161814
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,4,128,1,float16,float16,0,0.046336000164349876
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,1,128,1,float16,fp8,0,0.04554133117198944
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,4,128,1,float16,fp8,0,0.046581332882245384
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,8,128,1,float16,fp8,0,0.046613335609436035
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,8,128,1,float16,float16,0,0.04595733185609182
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,8,128,1,fp8,fp8,0,0.056799997886021934
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,40,128,1,float16,float16,0,0.028160000840822857
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,40,4,128,1,fp8,fp8,0,0.05515733361244202
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,40,128,1,float16,fp8,0,0.028704000016053517
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,1,128,1,float16,float16,0,0.028832000990708668
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,40,128,1,fp8,fp8,0,0.03525333354870478
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,1,128,1,float16,fp8,0,0.02881066749493281
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,1,128,1,fp8,fp8,0,0.034128000338872276
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,2,128,1,float16,float16,0,0.028912000358104706
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,2,128,1,float16,fp8,0,0.028629332780838013
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,4,128,1,float16,float16,0,0.02882133424282074
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,4,128,1,float16,fp8,0,0.029258665939172108
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,2,128,1,fp8,fp8,0,0.03381866713364919
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,8,128,1,float16,float16,0,0.029018667836983997
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,8,128,1,float16,fp8,0,0.029125332832336426
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,8,128,1,fp8,fp8,0,0.03484266748030981
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,40,4,128,1,fp8,fp8,0,0.03419200082619985
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,40,128,1,float16,float16,0,0.021040000021457672
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,40,128,1,fp8,fp8,0,0.025087999800841015
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,40,128,1,float16,fp8,0,0.02145066608985265
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,1,128,1,float16,fp8,0,0.02080533280968666
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,1,128,1,fp8,fp8,0,0.024853333830833435
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,1,128,1,float16,float16,0,0.021055998901526134
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,2,128,1,float16,float16,0,0.021141332884629566
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,2,128,1,fp8,fp8,0,0.02496533344189326
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,4,128,1,float16,float16,0,0.020725333442290623
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,4,128,1,float16,fp8,0,0.02117866774400075
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,2,128,1,float16,fp8,0,0.021061333517233532
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,4,128,1,fp8,fp8,0,0.025461333493391674
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,8,128,1,float16,float16,0,0.020597333709398907
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,40,128,1,float16,float16,0,0.017743999759356182
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,40,128,1,float16,fp8,0,0.018309333672126133
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,8,128,1,fp8,fp8,0,0.025600001215934753
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,40,128,1,fp8,fp8,0,0.020874666670958202
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,40,8,128,1,float16,fp8,0,0.02089066555102666
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,1,128,1,fp8,fp8,0,0.02062400057911873
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,1,128,1,float16,fp8,0,0.017727999637524288
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,2,128,1,float16,fp8,0,0.01806933308641116
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,2,128,1,float16,float16,0,0.01747200017174085
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,1,128,1,float16,float16,0,0.017525333911180496
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,4,128,1,float16,float16,0,0.017935999979575474
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,2,128,1,fp8,fp8,0,0.020794666061798733
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,4,128,1,fp8,fp8,0,0.020997333029905956
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,4,128,1,float16,fp8,0,0.017749333133300144
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,8,128,1,float16,fp8,0,0.018165333817402523
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,8,128,1,float16,float16,0,0.017946666727463405
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,40,8,128,1,fp8,fp8,0,0.02067733307679494
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,40,128,1,float16,fp8,0,0.014752000570297241
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,40,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,40,128,1,float16,float16,0,0.014469332993030548
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,1,128,1,float16,float16,0,0.014005333185195923
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,1,128,1,fp8,fp8,0,0.016778666526079178
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,2,128,1,float16,fp8,0,0.01471466695268949
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,2,128,1,float16,float16,0,0.014208000153303146
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,2,128,1,fp8,fp8,0,0.01664000004529953
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,1,128,1,float16,fp8,0,0.01469333345691363
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,4,128,1,float16,float16,0,0.014314666390419006
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,4,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,8,128,1,float16,float16,0,0.014709333578745524
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,8,128,1,float16,fp8,0,0.014730667074521383
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,8,128,1,fp8,fp8,0,0.016757333030303318
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,40,128,1,float16,float16,0,0.013909333695967993
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,40,4,128,1,float16,fp8,0,0.014826666563749313
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,1,128,1,float16,float16,0,0.01394133393963178
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,1,128,1,float16,fp8,0,0.014730667074521383
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,40,128,1,float16,fp8,0,0.014538666854302088
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,1,128,1,fp8,fp8,0,0.016074666132529575
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,2,128,1,float16,float16,0,0.014021333307027817
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,40,128,1,fp8,fp8,0,0.016271999726692837
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,2,128,1,float16,fp8,0,0.014576000471909841
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,4,128,1,float16,float16,0,0.01393066719174385
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,4,128,1,float16,fp8,0,0.014677333335081736
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,8,128,1,float16,float16,0,0.013893333574136099
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,2,128,1,fp8,fp8,0,0.016384000579516094
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,8,128,1,float16,fp8,0,0.014725333700577417
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,4,128,1,fp8,fp8,0,0.016122666498025257
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,40,8,128,1,fp8,fp8,0,0.016480000068744022
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,32,1,128,1,float16,float16,0,32.97238413492838
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,32,1,128,1,fp8,fp8,0,21.219839731852215
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,32,1,128,1,float16,fp8,0,32.170544942220054
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,32,2,128,1,fp8,fp8,0,21.33184560139974
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,32,2,128,1,float16,float16,0,33.29412841796875
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,32,2,128,1,float16,fp8,0,32.7127939860026
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,32,4,128,1,float16,fp8,0,32.87822469075521
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,32,4,128,1,float16,float16,0,33.23435719807943
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,32,4,128,1,fp8,fp8,0,21.588544209798176
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,1,128,1,float16,float16,0,16.779579162597656
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,32,8,128,1,fp8,fp8,0,21.514793395996094
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,32,8,128,1,float16,float16,0,32.90864054361979
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,32,8,128,1,float16,fp8,0,32.43947092692057
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,32,128,1,float16,fp8,0,16.184730529785156
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,32,128,1,float16,float16,0,16.396544138590496
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,1,128,1,fp8,fp8,0,10.85870361328125
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,32,128,1,fp8,fp8,0,10.861141204833984
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,1,128,1,float16,fp8,0,16.450623830159504
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,2,128,1,float16,float16,0,16.57647450764974
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,2,128,1,fp8,fp8,0,10.642565409342447
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,2,128,1,float16,fp8,0,16.42425537109375
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,4,128,1,float16,fp8,0,16.698741912841797
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,4,128,1,float16,float16,0,16.497124989827473
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,4,128,1,fp8,fp8,0,10.867205301920572
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,8,128,1,float16,fp8,0,16.609242757161457
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,1,128,1,float16,float16,0,8.231381098429361
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,8,128,1,float16,float16,0,16.802837371826172
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,1,128,1,float16,fp8,0,8.292293548583984
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,32,8,128,1,fp8,fp8,0,10.912223815917969
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,32,128,1,float16,fp8,0,8.365631739298502
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,32,128,1,float16,float16,0,8.140725453694662
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,32,128,1,fp8,fp8,0,5.444704055786133
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,1,128,1,fp8,fp8,0,5.225354512532552
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,2,128,1,float16,fp8,0,8.351130803426107
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,2,128,1,float16,float16,0,8.420149485270182
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,2,128,1,fp8,fp8,0,5.376389185587565
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,4,128,1,fp8,fp8,0,5.3727677663167315
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,4,128,1,float16,fp8,0,8.298159917195639
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,4,128,1,float16,float16,0,8.383589426676432
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,8,128,1,float16,float16,0,8.34227180480957
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,8,128,1,float16,fp8,0,8.402490615844727
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,32,8,128,1,fp8,fp8,0,5.4651947021484375
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,1,128,1,float16,float16,0,4.0938294728597
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,1,128,1,float16,fp8,0,4.102741241455078
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,32,128,1,float16,float16,0,4.042165438334147
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,32,128,1,float16,fp8,0,3.98421319325765
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,1,128,1,fp8,fp8,0,2.7762721379597983
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,2,128,1,float16,float16,0,4.190373420715332
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,32,128,1,fp8,fp8,0,2.70142396291097
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,2,128,1,float16,fp8,0,4.389072100321452
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,2,128,1,fp8,fp8,0,2.748384157816569
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,4,128,1,float16,float16,0,4.052330652872722
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,4,128,1,float16,fp8,0,4.038554509480794
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,4,128,1,fp8,fp8,0,2.7833067576090493
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,8,128,1,float16,float16,0,4.192960103352864
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,8,128,1,float16,fp8,0,4.099162737528483
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,32,8,128,1,fp8,fp8,0,2.755274772644043
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,32,1,128,1,fp8,fp8,0,12.610965728759766
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,32,1,128,1,float16,fp8,0,18.786773681640625
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,32,1,128,1,float16,float16,0,19.329813639322918
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,32,2,128,1,fp8,fp8,0,12.735003153483072
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,32,2,128,1,float16,float16,0,19.116629282633465
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,32,2,128,1,float16,fp8,0,19.027231852213543
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,32,4,128,1,float16,fp8,0,18.64563242594401
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,32,4,128,1,float16,float16,0,19.253915150960285
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,1,128,1,float16,float16,0,9.64523188273112
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,32,4,128,1,fp8,fp8,0,12.603397369384766
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,32,8,128,1,float16,float16,0,18.926480611165363
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,32,8,128,1,float16,fp8,0,19.187520345052082
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,32,8,128,1,fp8,fp8,0,12.727765401204428
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,1,128,1,float16,fp8,0,9.602448145548502
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,1,128,1,fp8,fp8,0,6.256415685017903
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,32,128,1,float16,float16,0,9.435114542643229
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,32,128,1,fp8,fp8,0,6.1652266184488935
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,32,128,1,float16,fp8,0,9.333722432454428
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,2,128,1,float16,float16,0,9.641946792602539
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,2,128,1,float16,fp8,0,9.704218546549479
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,2,128,1,fp8,fp8,0,6.281637191772461
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,4,128,1,float16,float16,0,9.61628786722819
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,4,128,1,float16,fp8,0,9.447226842244467
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,4,128,1,fp8,fp8,0,6.1204376220703125
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,8,128,1,float16,float16,0,9.5926882425944
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,8,128,1,float16,fp8,0,9.525594711303711
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,1,128,1,float16,float16,0,4.550565401713054
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,32,8,128,1,fp8,fp8,0,6.13206418355306
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,32,128,1,float16,float16,0,4.541962623596191
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,1,128,1,fp8,fp8,0,3.083866755167643
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,1,128,1,float16,fp8,0,4.706528027852376
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,32,128,1,float16,fp8,0,4.833423932393392
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,2,128,1,float16,float16,0,4.59333864847819
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,32,128,1,fp8,fp8,0,3.163519859313965
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,2,128,1,float16,fp8,0,4.738858540852864
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,2,128,1,fp8,fp8,0,3.1204799016316733
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,4,128,1,float16,float16,0,4.760592142740886
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,4,128,1,float16,fp8,0,4.705615997314453
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,4,128,1,fp8,fp8,0,3.1088692347208657
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,8,128,1,float16,float16,0,4.607397397359212
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,1,128,1,float16,float16,0,2.39573335647583
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,8,128,1,fp8,fp8,0,3.179893175760905
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,32,8,128,1,float16,fp8,0,4.579034805297852
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,1,128,1,float16,fp8,0,2.3929120699564614
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,32,128,1,float16,float16,0,2.367488066355387
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,32,128,1,float16,fp8,0,2.3538452784220376
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,1,128,1,fp8,fp8,0,1.6684746742248535
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,32,128,1,fp8,fp8,0,1.6763946215311687
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,2,128,1,float16,float16,0,2.3976747194925943
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,2,128,1,float16,fp8,0,2.414789358774821
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,2,128,1,fp8,fp8,0,1.673701286315918
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,4,128,1,float16,float16,0,2.4212533632914224
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,4,128,1,float16,fp8,0,2.3908534049987793
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,4,128,1,fp8,fp8,0,1.6746880213419597
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,8,128,1,float16,float16,0,2.4263359705607095
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,8,128,1,float16,fp8,0,2.4182613690694175
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,32,8,128,1,fp8,fp8,0,1.6779146194458008
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,32,1,128,1,float16,float16,0,13.506304423014322
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,32,1,128,1,fp8,fp8,0,8.815343856811523
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,32,2,128,1,float16,float16,0,13.587936401367188
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,32,2,128,1,float16,fp8,0,13.479680379231771
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,32,2,128,1,fp8,fp8,0,9.008303960164389
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,32,1,128,1,float16,fp8,0,13.552959442138672
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,32,4,128,1,float16,float16,0,13.779983520507812
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,32,4,128,1,float16,fp8,0,13.287668863932291
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,1,128,1,float16,float16,0,6.789461135864258
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,32,4,128,1,fp8,fp8,0,9.026224136352539
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,32,128,1,float16,float16,0,6.8743947347005205
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,32,8,128,1,fp8,fp8,0,8.90881601969401
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,32,128,1,float16,fp8,0,6.743589401245117
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,32,8,128,1,float16,float16,0,13.66873550415039
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,1,128,1,float16,fp8,0,6.574341456095378
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,32,8,128,1,float16,fp8,0,13.572891235351562
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,32,128,1,fp8,fp8,0,4.500341415405273
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,1,128,1,fp8,fp8,0,4.443552017211914
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,2,128,1,float16,float16,0,6.83293342590332
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,2,128,1,float16,fp8,0,6.656954447428386
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,2,128,1,fp8,fp8,0,4.4697920481363935
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,4,128,1,float16,fp8,0,6.578917185465495
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,4,128,1,fp8,fp8,0,4.377674738566081
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,4,128,1,float16,float16,0,6.635434468587239
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,8,128,1,float16,float16,0,6.9668534596761065
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,8,128,1,float16,fp8,0,6.840415954589844
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,1,128,1,float16,float16,0,3.260885238647461
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,1,128,1,float16,fp8,0,3.2406400044759116
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,1,128,1,fp8,fp8,0,2.2551679611206055
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,32,128,1,float16,float16,0,3.22763729095459
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,32,8,128,1,fp8,fp8,0,4.5507198969523115
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,32,128,1,float16,fp8,0,3.244213422139486
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,32,128,1,fp8,fp8,0,2.2556533813476562
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,2,128,1,float16,float16,0,3.415226618448893
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,2,128,1,float16,fp8,0,3.2674134572347007
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,2,128,1,fp8,fp8,0,2.2534772555033364
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,4,128,1,float16,float16,0,3.3028907775878906
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,4,128,1,float16,fp8,0,3.243040084838867
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,4,128,1,fp8,fp8,0,2.2578934033711753
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,8,128,1,float16,float16,0,3.38865598042806
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,8,128,1,float16,fp8,0,3.247882525126139
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,32,8,128,1,fp8,fp8,0,2.281797409057617
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,32,128,1,float16,float16,0,1.7170346577962239
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,32,128,1,float16,fp8,0,1.7156480153401692
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,1,128,1,float16,float16,0,1.7463733355204265
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,1,128,1,float16,fp8,0,1.7319040298461914
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,1,128,1,fp8,fp8,0,1.2296960353851318
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,2,128,1,float16,float16,0,1.741557280222575
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,2,128,1,float16,fp8,0,1.7453813552856445
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,32,128,1,fp8,fp8,0,1.238986651102702
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,2,128,1,fp8,fp8,0,1.2324639956156414
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,4,128,1,float16,float16,0,1.7563519477844238
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,4,128,1,float16,fp8,0,1.7314666112263997
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,4,128,1,fp8,fp8,0,1.2320000330607097
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,8,128,1,float16,float16,0,1.7655466397603352
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,8,128,1,float16,fp8,0,1.7263894081115723
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,32,8,128,1,fp8,fp8,0,1.243887980779012
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,32,1,128,1,float16,fp8,0,17.677210489908855
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,32,1,128,1,float16,float16,0,17.900000254313152
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,32,2,128,1,float16,float16,0,17.627914428710938
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,32,1,128,1,fp8,fp8,0,11.894159952799479
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,32,2,128,1,fp8,fp8,0,12.101306915283203
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,32,2,128,1,float16,fp8,0,17.744059244791668
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,32,4,128,1,float16,fp8,0,17.7064692179362
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,32,4,128,1,float16,float16,0,17.632923126220703
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,1,128,1,float16,float16,0,8.848426818847656
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,32,4,128,1,fp8,fp8,0,11.995231628417969
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,1,128,1,float16,fp8,0,8.61195182800293
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,32,8,128,1,float16,fp8,0,17.795525868733723
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,32,8,128,1,float16,float16,0,18.090799967447918
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,32,128,1,float16,fp8,0,8.964234670003256
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,32,128,1,float16,float16,0,9.073429107666016
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,32,8,128,1,fp8,fp8,0,12.023450215657553
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,32,128,1,fp8,fp8,0,6.141120274861653
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,1,128,1,fp8,fp8,0,5.987509409586589
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,2,128,1,float16,float16,0,8.905893325805664
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,2,128,1,fp8,fp8,0,5.948879877726237
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,2,128,1,float16,fp8,0,8.735631942749023
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,4,128,1,float16,float16,0,9.088074366251627
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,4,128,1,float16,fp8,0,8.861146926879883
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,4,128,1,fp8,fp8,0,5.869834899902344
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,8,128,1,float16,float16,0,8.985066731770834
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,8,128,1,float16,fp8,0,9.012746810913086
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,1,128,1,float16,float16,0,4.4157759348551435
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,1,128,1,float16,fp8,0,4.351674715677897
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,32,8,128,1,fp8,fp8,0,5.80296516418457
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,1,128,1,fp8,fp8,0,2.9296960830688477
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,32,128,1,float16,float16,0,4.233498573303223
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,2,128,1,float16,float16,0,4.206501324971517
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,32,128,1,float16,fp8,0,4.174319903055827
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,2,128,1,float16,fp8,0,4.2890879313151045
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,2,128,1,fp8,fp8,0,2.913189252217611
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,32,128,1,fp8,fp8,0,2.99726931254069
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,4,128,1,float16,float16,0,4.2737172444661455
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,4,128,1,fp8,fp8,0,2.93505064646403
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,4,128,1,float16,fp8,0,4.423994700113933
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,8,128,1,float16,float16,0,4.229167938232422
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,8,128,1,float16,fp8,0,4.189882596333821
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,32,8,128,1,fp8,fp8,0,2.9558614095052085
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,1,128,1,float16,float16,0,2.2095093727111816
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,32,128,1,float16,float16,0,2.190015951792399
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,1,128,1,float16,fp8,0,2.196213404337565
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,1,128,1,fp8,fp8,0,1.5312906901041667
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,32,128,1,float16,fp8,0,2.172309398651123
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,2,128,1,float16,float16,0,2.1900426546732583
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,2,128,1,float16,fp8,0,2.182746728261312
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,32,128,1,fp8,fp8,0,1.555605411529541
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,2,128,1,fp8,fp8,0,1.5489385922749836
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,4,128,1,float16,float16,0,2.205397288004557
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,4,128,1,float16,fp8,0,2.1952640215555825
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,4,128,1,fp8,fp8,0,1.5420160293579102
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,8,128,1,float16,float16,0,2.2131946881612143
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,32,128,1,float16,float16,0,1.1686240037282307
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,8,128,1,fp8,fp8,0,1.5570665995279949
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,32,8,128,1,float16,fp8,0,2.1791253089904785
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,1,128,1,float16,float16,0,1.1990079879760742
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,1,128,1,float16,fp8,0,1.173904021581014
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,32,128,1,float16,fp8,0,1.1787467002868652
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,1,128,1,fp8,fp8,0,0.8526720205942789
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,2,128,1,float16,float16,0,1.1877013047536213
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,2,128,1,float16,fp8,0,1.1820906798044841
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,2,128,1,fp8,fp8,0,0.8573226928710938
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,32,128,1,fp8,fp8,0,0.8631733258565267
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,4,128,1,float16,float16,0,1.203002691268921
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,4,128,1,float16,fp8,0,1.1773066520690918
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,4,128,1,fp8,fp8,0,0.8623999754587809
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,8,128,1,float16,float16,0,1.1975359916687012
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,8,128,1,float16,fp8,0,1.176047960917155
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,32,8,128,1,fp8,fp8,0,0.8636266390482584
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,32,1,128,1,float16,float16,0,10.609482447306315
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,32,1,128,1,fp8,fp8,0,7.106794357299805
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,32,1,128,1,float16,fp8,0,10.414837519327799
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,32,2,128,1,float16,float16,0,10.502202351888021
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,32,2,128,1,float16,fp8,0,10.475919723510742
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,32,2,128,1,fp8,fp8,0,7.276608149210612
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,32,4,128,1,float16,float16,0,10.406672159830729
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,32,4,128,1,float16,fp8,0,10.55131721496582
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,1,128,1,float16,float16,0,5.2209014892578125
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,32,4,128,1,fp8,fp8,0,7.11409060160319
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,32,8,128,1,float16,float16,0,10.603008270263672
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,32,8,128,1,fp8,fp8,0,7.135589599609375
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,1,128,1,float16,fp8,0,5.051621437072754
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,32,8,128,1,float16,fp8,0,10.406517028808594
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,32,128,1,float16,float16,0,5.097322781880696
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,32,128,1,float16,fp8,0,4.977765401204427
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,32,128,1,fp8,fp8,0,3.5677706400553384
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,1,128,1,fp8,fp8,0,3.491759936014811
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,2,128,1,float16,fp8,0,5.041925430297852
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,2,128,1,fp8,fp8,0,3.4969654083251953
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,4,128,1,float16,float16,0,5.21674124399821
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,2,128,1,float16,float16,0,5.266010602315267
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,4,128,1,float16,fp8,0,5.029941240946452
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,4,128,1,fp8,fp8,0,3.484970728556315
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,8,128,1,float16,float16,0,4.935386657714844
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,8,128,1,float16,fp8,0,5.196981430053711
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,1,128,1,float16,float16,0,2.5382933616638184
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,1,128,1,float16,fp8,0,2.4946719805399575
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,1,128,1,fp8,fp8,0,1.779578685760498
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,32,8,128,1,fp8,fp8,0,3.57421875
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,32,128,1,float16,float16,0,2.554863929748535
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,2,128,1,fp8,fp8,0,1.7873172760009766
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,2,128,1,float16,float16,0,2.5210506121317544
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,2,128,1,float16,fp8,0,2.5124692916870117
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,32,128,1,float16,fp8,0,2.5214293797810874
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,32,128,1,fp8,fp8,0,1.8778880437215169
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,4,128,1,float16,fp8,0,2.5141332944234214
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,4,128,1,float16,float16,0,2.5417493184407554
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,4,128,1,fp8,fp8,0,1.7983733812967937
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,8,128,1,float16,fp8,0,2.510826587677002
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,8,128,1,fp8,fp8,0,1.821381409962972
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,32,8,128,1,float16,float16,0,2.5465173721313477
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,32,128,1,float16,float16,0,1.3340586026509602
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,32,128,1,float16,fp8,0,1.3347253799438477
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,1,128,1,float16,float16,0,1.3248586654663086
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,1,128,1,float16,fp8,0,1.318405310312907
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,1,128,1,fp8,fp8,0,0.9575466314951578
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,2,128,1,float16,float16,0,1.3414400418599446
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,2,128,1,fp8,fp8,0,0.9622453053792318
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,2,128,1,float16,fp8,0,1.3166293303171794
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,32,128,1,fp8,fp8,0,0.9849546750386556
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,4,128,1,float16,float16,0,1.3462293942769368
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,4,128,1,float16,fp8,0,1.3119626839955647
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,4,128,1,fp8,fp8,0,0.963818629582723
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,8,128,1,float16,float16,0,1.3380053838094075
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,8,128,1,float16,fp8,0,1.332741340001424
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,1,128,1,float16,float16,0,0.7388266722361246
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,32,128,1,float16,float16,0,0.7342720031738281
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,1,128,1,float16,fp8,0,0.7306719621022543
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,32,128,1,float16,fp8,0,0.72597869237264
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,32,8,128,1,fp8,fp8,0,0.9756960074106852
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,32,128,1,fp8,fp8,0,0.5433919827143351
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,1,128,1,fp8,fp8,0,0.5198293526967367
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,2,128,1,float16,float16,0,0.744762659072876
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,2,128,1,float16,fp8,0,0.7278986771901449
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,4,128,1,float16,float16,0,0.744640032450358
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,2,128,1,fp8,fp8,0,0.5196319818496704
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,4,128,1,float16,fp8,0,0.7293813228607178
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,4,128,1,fp8,fp8,0,0.522160013516744
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,8,128,1,float16,float16,0,0.7479093074798584
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,8,128,1,float16,fp8,0,0.7332959969838461
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,32,8,128,1,fp8,fp8,0,0.5285653273264567
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,32,1,128,1,float16,float16,0,10.109055836995443
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,32,1,128,1,float16,fp8,0,10.213567733764648
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,32,2,128,1,float16,float16,0,10.441237131754557
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,32,2,128,1,float16,fp8,0,10.181941350301107
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,32,1,128,1,fp8,fp8,0,7.379546483357747
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,32,2,128,1,fp8,fp8,0,7.046506881713867
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,32,4,128,1,float16,float16,0,10.370442708333334
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,32,4,128,1,float16,fp8,0,10.207349141438803
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,1,128,1,float16,float16,0,4.999701182047526
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,32,4,128,1,fp8,fp8,0,7.052906672159831
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,1,128,1,float16,fp8,0,4.911375999450684
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,32,8,128,1,float16,float16,0,10.322426478068033
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,32,8,128,1,float16,fp8,0,10.231082916259766
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,32,8,128,1,fp8,fp8,0,7.221317291259766
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,32,128,1,float16,float16,0,5.159018516540527
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,32,128,1,float16,fp8,0,4.9825439453125
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,32,128,1,fp8,fp8,0,3.6883519490559897
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,1,128,1,fp8,fp8,0,3.4756641387939453
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,2,128,1,float16,fp8,0,4.7620798746744795
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,2,128,1,float16,float16,0,4.90777587890625
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,2,128,1,fp8,fp8,0,3.4898506800333657
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,4,128,1,float16,float16,0,5.076944033304851
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,4,128,1,fp8,fp8,0,3.542949358622233
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,4,128,1,float16,fp8,0,4.759365399678548
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,8,128,1,float16,float16,0,5.095343907674153
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,8,128,1,float16,fp8,0,4.81272538503011
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,1,128,1,float16,float16,0,2.4494239489237466
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,1,128,1,float16,fp8,0,2.3909974098205566
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,1,128,1,fp8,fp8,0,1.7725067138671875
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,32,8,128,1,fp8,fp8,0,3.592495918273926
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,32,128,1,float16,float16,0,2.5026453336079917
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,32,128,1,float16,fp8,0,2.4522506395975747
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,2,128,1,float16,float16,0,2.445290724436442
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,2,128,1,float16,fp8,0,2.4139199256896973
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,2,128,1,fp8,fp8,0,1.7728373209635417
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,32,128,1,fp8,fp8,0,1.8902400334676106
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,4,128,1,float16,float16,0,2.435978730519613
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,4,128,1,float16,fp8,0,2.417952060699463
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,4,128,1,fp8,fp8,0,1.7903946240743
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,8,128,1,float16,float16,0,2.4646506309509277
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,8,128,1,float16,fp8,0,2.403648058573405
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,1,128,1,float16,float16,0,1.2680906454722087
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,1,128,1,float16,fp8,0,1.2480800151824951
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,32,8,128,1,fp8,fp8,0,1.8167039553324382
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,32,128,1,float16,float16,0,1.2834986845652263
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,32,128,1,float16,fp8,0,1.2759573459625244
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,1,128,1,fp8,fp8,0,0.9290080070495605
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,2,128,1,float16,float16,0,1.2605973084767659
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,32,128,1,fp8,fp8,0,0.9920960267384847
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,2,128,1,float16,fp8,0,1.2501227060953777
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,2,128,1,fp8,fp8,0,0.9356640179951986
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,4,128,1,float16,float16,0,1.2769333521525066
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,4,128,1,float16,fp8,0,1.2483733495076497
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,4,128,1,fp8,fp8,0,0.940778652826945
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,8,128,1,float16,float16,0,1.2730027039845784
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,8,128,1,float16,fp8,0,1.2511893113454182
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,32,8,128,1,fp8,fp8,0,0.9533066749572754
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,32,128,1,float16,float16,0,0.6927946408589681
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,1,128,1,float16,float16,0,0.6824320157368978
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,32,128,1,float16,fp8,0,0.6876586278279623
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,1,128,1,float16,fp8,0,0.6722666422526041
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,1,128,1,fp8,fp8,0,0.5128959814707438
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,32,128,1,fp8,fp8,0,0.5403840144475301
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,2,128,1,float16,fp8,0,0.6707306702931722
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,2,128,1,float16,float16,0,0.6835626761118571
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,2,128,1,fp8,fp8,0,0.5147786537806193
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,4,128,1,float16,float16,0,0.6850879987080892
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,4,128,1,float16,fp8,0,0.6771999994913737
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,4,128,1,fp8,fp8,0,0.5192746718724569
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,8,128,1,float16,float16,0,0.6888960202534994
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,8,128,1,float16,fp8,0,0.6807253360748291
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,1,128,1,float16,float16,0,0.39671464761098224
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,32,8,128,1,fp8,fp8,0,0.5236106713612875
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,32,128,1,float16,float16,0,0.39202133814493817
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,1,128,1,float16,fp8,0,0.38839467366536456
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,32,128,1,float16,fp8,0,0.39177600542704266
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,32,128,1,fp8,fp8,0,0.2987520098686218
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,1,128,1,fp8,fp8,0,0.28515199820200604
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,2,128,1,float16,float16,0,0.39660799503326416
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,2,128,1,float16,fp8,0,0.3895893494288127
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,2,128,1,fp8,fp8,0,0.28563199440638226
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,4,128,1,float16,float16,0,0.3960213263829549
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,4,128,1,float16,fp8,0,0.389903982480367
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,4,128,1,fp8,fp8,0,0.28709866603215534
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,8,128,1,float16,float16,0,0.3976373275121053
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,8,128,1,float16,fp8,0,0.3909226655960083
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,32,8,128,1,fp8,fp8,0,0.29151467482248944
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,32,1,128,1,float16,float16,0,6.2116851806640625
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,32,1,128,1,fp8,fp8,0,4.386336008707683
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,32,1,128,1,float16,fp8,0,6.091930389404297
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,32,2,128,1,float16,fp8,0,6.137765248616536
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,32,2,128,1,float16,float16,0,6.033034642537435
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,32,2,128,1,fp8,fp8,0,4.433658599853516
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,32,4,128,1,float16,float16,0,6.110240300496419
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,32,4,128,1,float16,fp8,0,6.124949137369792
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,1,128,1,float16,float16,0,2.9717280069986978
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,32,4,128,1,fp8,fp8,0,4.436383883158366
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,32,8,128,1,float16,fp8,0,5.996053059895833
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,32,8,128,1,float16,float16,0,6.167439778645833
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,32,8,128,1,fp8,fp8,0,4.551738739013672
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,1,128,1,float16,fp8,0,2.9147040049235025
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,32,128,1,float16,float16,0,3.0841973622639975
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,32,128,1,float16,fp8,0,3.0173705418904624
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,32,128,1,fp8,fp8,0,2.3864320119222007
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,1,128,1,fp8,fp8,0,2.2101386388142905
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,2,128,1,float16,float16,0,2.9519627888997397
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,2,128,1,fp8,fp8,0,2.217583974202474
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,2,128,1,float16,fp8,0,2.8811254501342773
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,4,128,1,float16,float16,0,2.9917707443237305
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,4,128,1,float16,fp8,0,2.913450558980306
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,4,128,1,fp8,fp8,0,2.233290672302246
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,8,128,1,float16,fp8,0,2.9555466969807944
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,8,128,1,float16,float16,0,2.9830506642659507
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,1,128,1,float16,float16,0,1.5081599553426106
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,1,128,1,float16,fp8,0,1.4777119954427083
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,32,8,128,1,fp8,fp8,0,2.2674880027770996
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,32,128,1,float16,float16,0,1.5660533905029297
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,32,128,1,float16,fp8,0,1.527407964070638
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,1,128,1,fp8,fp8,0,1.1385119756062825
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,2,128,1,float16,float16,0,1.5090986887613933
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,2,128,1,float16,fp8,0,1.4828640619913738
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,32,128,1,fp8,fp8,0,1.2378613154093425
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,2,128,1,fp8,fp8,0,1.1374773184458415
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,4,128,1,float16,float16,0,1.5132160186767578
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,4,128,1,float16,fp8,0,1.4952960014343262
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,4,128,1,fp8,fp8,0,1.1456106503804524
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,8,128,1,float16,float16,0,1.5348745981852214
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,8,128,1,float16,fp8,0,1.4981973965962727
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,1,128,1,float16,float16,0,0.7956746419270834
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,32,8,128,1,fp8,fp8,0,1.1645546754201253
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,32,128,1,float16,float16,0,0.8179146448771158
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,1,128,1,float16,fp8,0,0.7774933179219564
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,1,128,1,fp8,fp8,0,0.6084853410720825
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,32,128,1,float16,fp8,0,0.8056693077087402
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,2,128,1,float16,float16,0,0.7937599817911783
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,2,128,1,float16,fp8,0,0.7816480000813802
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,32,128,1,fp8,fp8,0,0.655130664507548
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,2,128,1,fp8,fp8,0,0.6076266765594482
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,4,128,1,float16,float16,0,0.8008000055948893
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,4,128,1,float16,fp8,0,0.7818026542663574
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,4,128,1,fp8,fp8,0,0.6144959926605225
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,8,128,1,float16,float16,0,0.8020533720652262
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,8,128,1,float16,fp8,0,0.787775993347168
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,1,128,1,float16,float16,0,0.43769601980845135
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,32,8,128,1,fp8,fp8,0,0.6230133374532064
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,32,128,1,float16,float16,0,0.443178653717041
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,1,128,1,float16,fp8,0,0.43145068486531574
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,32,128,1,float16,fp8,0,0.4395306507746379
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,1,128,1,fp8,fp8,0,0.32499732573827106
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,32,128,1,fp8,fp8,0,0.360426664352417
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,2,128,1,float16,float16,0,0.4389333327611287
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,2,128,1,float16,fp8,0,0.429200013478597
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,4,128,1,float16,float16,0,0.4411146640777588
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,4,128,1,float16,fp8,0,0.43438398838043213
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,2,128,1,fp8,fp8,0,0.3274293343226115
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,4,128,1,fp8,fp8,0,0.3301226695378621
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,8,128,1,float16,float16,0,0.4415359894434611
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,8,128,1,float16,fp8,0,0.4357066551844279
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,32,8,128,1,fp8,fp8,0,0.3322719931602478
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,32,128,1,float16,float16,0,0.24086399873097739
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,1,128,1,float16,float16,0,0.23567465941111246
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,1,128,1,float16,fp8,0,0.23028266429901123
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,32,128,1,fp8,fp8,0,0.2039146622021993
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,32,128,1,float16,fp8,0,0.23789334297180176
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,1,128,1,fp8,fp8,0,0.19100266695022583
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,2,128,1,float16,float16,0,0.23611734310785928
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,2,128,1,float16,fp8,0,0.22899732987085977
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,2,128,1,fp8,fp8,0,0.19136534134546915
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,4,128,1,float16,float16,0,0.23706666628519693
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,4,128,1,float16,fp8,0,0.23046932617823282
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,4,128,1,fp8,fp8,0,0.19389333327611288
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,8,128,1,float16,float16,0,0.23715200026830038
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,8,128,1,float16,fp8,0,0.23122666279474893
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,32,8,128,1,fp8,fp8,0,0.19637332359949747
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,32,1,128,1,float16,float16,0,6.287098566691081
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,32,1,128,1,float16,fp8,0,6.101157506306966
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,32,1,128,1,fp8,fp8,0,4.793909390767415
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,32,2,128,1,fp8,fp8,0,4.8206132253011065
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,32,2,128,1,float16,fp8,0,6.0804799397786455
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,32,2,128,1,float16,float16,0,6.329349517822266
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,32,4,128,1,float16,float16,0,6.3691253662109375
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,32,4,128,1,float16,fp8,0,6.06390380859375
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,1,128,1,float16,float16,0,3.091013272603353
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,32,4,128,1,fp8,fp8,0,4.869621276855469
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,1,128,1,float16,fp8,0,2.9988425572713218
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,32,8,128,1,fp8,fp8,0,4.928965250651042
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,32,8,128,1,float16,float16,0,6.275093078613281
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,32,128,1,float16,float16,0,3.2768214543660483
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,32,8,128,1,float16,fp8,0,6.233695983886719
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,32,128,1,float16,fp8,0,3.170389175415039
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,32,128,1,fp8,fp8,0,2.6549866994222007
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,1,128,1,fp8,fp8,0,2.4057440757751465
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,2,128,1,float16,float16,0,3.1034345626831055
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,2,128,1,float16,fp8,0,2.986853281656901
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,2,128,1,fp8,fp8,0,2.4135519663492837
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,4,128,1,float16,float16,0,3.134160041809082
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,4,128,1,float16,fp8,0,3.0220371882120767
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,4,128,1,fp8,fp8,0,2.4380319913228354
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,8,128,1,float16,float16,0,3.1433385213216147
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,8,128,1,float16,fp8,0,3.0647732416788735
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,1,128,1,float16,float16,0,1.5453920364379883
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,1,128,1,float16,fp8,0,1.5045706431070964
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,1,128,1,fp8,fp8,0,1.212821324666341
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,32,8,128,1,fp8,fp8,0,2.483450730641683
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,32,128,1,float16,float16,0,1.6203680038452148
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,32,128,1,float16,fp8,0,1.5904960632324219
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,2,128,1,float16,float16,0,1.54147736231486
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,2,128,1,float16,fp8,0,1.5135307312011719
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,2,128,1,fp8,fp8,0,1.2201120058695476
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,32,128,1,fp8,fp8,0,1.3560585975646973
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,4,128,1,fp8,fp8,0,1.2281546592712402
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,4,128,1,float16,fp8,0,1.5269227027893066
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,4,128,1,float16,float16,0,1.5529920260111492
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,8,128,1,float16,float16,0,1.560538609822591
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,8,128,1,fp8,fp8,0,1.2525280316670735
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,32,8,128,1,float16,fp8,0,1.5335893630981445
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,1,128,1,float16,float16,0,0.8025866349538168
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,32,128,1,float16,fp8,0,0.8332106272379557
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,32,128,1,float16,float16,0,0.836575984954834
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,1,128,1,float16,fp8,0,0.780623992284139
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,1,128,1,fp8,fp8,0,0.6347999970118204
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,2,128,1,float16,float16,0,0.8041600386301676
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,2,128,1,float16,fp8,0,0.7826080322265625
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,32,128,1,fp8,fp8,0,0.7043840090433756
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,2,128,1,fp8,fp8,0,0.6403359969456991
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,4,128,1,float16,float16,0,0.8100213209788004
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,4,128,1,float16,fp8,0,0.7891733646392822
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,4,128,1,fp8,fp8,0,0.6424746513366699
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,8,128,1,float16,float16,0,0.8120693365732828
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,1,128,1,float16,float16,0,0.43237332503000897
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,8,128,1,float16,fp8,0,0.7989652951558431
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,32,128,1,float16,float16,0,0.4448639949162801
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,32,8,128,1,fp8,fp8,0,0.6546880006790161
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,32,128,1,float16,fp8,0,0.4433600107828776
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,32,128,1,fp8,fp8,0,0.38504000504811603
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,1,128,1,fp8,fp8,0,0.34532801310221356
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,1,128,1,float16,fp8,0,0.41950400670369464
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,2,128,1,float16,float16,0,0.43383467197418213
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,2,128,1,fp8,fp8,0,0.34654398759206134
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,2,128,1,float16,fp8,0,0.4230080048243205
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,4,128,1,float16,fp8,0,0.4250986576080322
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,4,128,1,float16,float16,0,0.43408532937367755
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,4,128,1,fp8,fp8,0,0.3521546522776286
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,8,128,1,float16,float16,0,0.43623467286427814
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,8,128,1,float16,fp8,0,0.4273013273874919
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,32,8,128,1,fp8,fp8,0,0.3561973174413045
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,1,128,1,float16,float16,0,0.24364266792933145
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,1,128,1,float16,fp8,0,0.2392586668332418
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,1,128,1,fp8,fp8,0,0.18897600968678793
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,32,128,1,float16,fp8,0,0.2505066593488057
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,32,128,1,fp8,fp8,0,0.20930665731430054
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,32,128,1,float16,float16,0,0.25196800629297894
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,2,128,1,float16,fp8,0,0.23942933479944864
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,2,128,1,fp8,fp8,0,0.18868800004323324
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,4,128,1,float16,float16,0,0.24788800875345865
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,2,128,1,float16,float16,0,0.24433066447575888
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,4,128,1,fp8,fp8,0,0.19106666247049967
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,4,128,1,float16,fp8,0,0.240282674630483
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,8,128,1,float16,fp8,0,0.2429813345273336
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,8,128,1,float16,float16,0,0.2494879961013794
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,32,8,128,1,fp8,fp8,0,0.19420266151428223
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,32,128,1,float16,float16,0,0.13886400063832602
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,1,128,1,float16,float16,0,0.1333440045515696
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,32,128,1,float16,fp8,0,0.13749333222707114
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,1,128,1,float16,fp8,0,0.12999999523162842
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,1,128,1,fp8,fp8,0,0.11132799585660298
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,2,128,1,float16,float16,0,0.13326399525006613
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,32,128,1,fp8,fp8,0,0.12459733088811238
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,2,128,1,float16,fp8,0,0.13025066256523132
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,2,128,1,fp8,fp8,0,0.1130400002002716
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,4,128,1,float16,float16,0,0.1341866652170817
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,4,128,1,float16,fp8,0,0.1316106617450714
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,8,128,1,float16,float16,0,0.13524267077445984
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,4,128,1,fp8,fp8,0,0.11482666929562886
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,8,128,1,float16,fp8,0,0.13246933619181314
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,32,8,128,1,fp8,fp8,0,0.11777599652608235
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,32,1,128,1,float16,float16,0,4.030661265055339
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,32,1,128,1,float16,fp8,0,3.869194666544596
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,32,1,128,1,fp8,fp8,0,3.2420266469319663
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,32,2,128,1,float16,float16,0,4.0356801350911455
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,32,2,128,1,float16,fp8,0,3.855152130126953
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,32,4,128,1,float16,float16,0,4.106213251749675
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,32,4,128,1,float16,fp8,0,3.983375867207845
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,32,2,128,1,fp8,fp8,0,3.2427520751953125
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,1,128,1,float16,float16,0,1.9766987164815266
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,32,4,128,1,fp8,fp8,0,3.274458567301432
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,32,8,128,1,float16,fp8,0,3.925429344177246
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,1,128,1,float16,fp8,0,1.91428804397583
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,32,8,128,1,float16,float16,0,4.085061391194661
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,32,128,1,float16,float16,0,2.12772798538208
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,32,8,128,1,fp8,fp8,0,3.333594640096029
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,1,128,1,fp8,fp8,0,1.6231892903645833
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,32,128,1,fp8,fp8,0,1.823530673980713
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,32,128,1,float16,fp8,0,2.0863946278889975
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,2,128,1,float16,fp8,0,1.9188693364461262
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,2,128,1,float16,float16,0,1.9869386355082195
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,2,128,1,fp8,fp8,0,1.6247679392496746
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,4,128,1,float16,float16,0,1.9947147369384766
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,4,128,1,float16,fp8,0,1.9434720675150554
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,4,128,1,fp8,fp8,0,1.644389311472575
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,8,128,1,float16,float16,0,2.020239988962809
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,1,128,1,float16,float16,0,1.0063306490580242
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,8,128,1,float16,fp8,0,1.9679306348164876
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,1,128,1,float16,fp8,0,0.9809333483378092
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,32,8,128,1,fp8,fp8,0,1.6728800137837727
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,32,128,1,float16,float16,0,1.0736640294392903
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,32,128,1,float16,fp8,0,1.0511893431345622
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,1,128,1,fp8,fp8,0,0.823311964670817
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,32,128,1,fp8,fp8,0,0.9326933224995931
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,2,128,1,float16,float16,0,1.011519988377889
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,2,128,1,float16,fp8,0,0.9825920263926188
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,2,128,1,fp8,fp8,0,0.8322506745656332
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,4,128,1,float16,float16,0,1.009663979212443
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,4,128,1,float16,fp8,0,0.987669308980306
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,4,128,1,fp8,fp8,0,0.8372320334116617
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,8,128,1,float16,fp8,0,0.9985439777374268
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,8,128,1,float16,float16,0,1.0204373200734456
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,1,128,1,float16,float16,0,0.5282080173492432
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,32,8,128,1,fp8,fp8,0,0.856282631556193
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,32,128,1,float16,float16,0,0.5583733320236206
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,32,128,1,float16,fp8,0,0.5498186747233073
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,1,128,1,float16,fp8,0,0.5141439835230509
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,32,128,1,fp8,fp8,0,0.49004268646240234
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,1,128,1,fp8,fp8,0,0.4361493190129598
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,2,128,1,fp8,fp8,0,0.440229336420695
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,2,128,1,float16,fp8,0,0.5162186622619629
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,2,128,1,float16,float16,0,0.5289066632588705
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,4,128,1,float16,float16,0,0.5330400069554647
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,4,128,1,float16,fp8,0,0.5181333223978678
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,4,128,1,fp8,fp8,0,0.4421279827753703
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,1,128,1,float16,float16,0,0.2884906729062398
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,8,128,1,float16,float16,0,0.5365386803944906
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,8,128,1,fp8,fp8,0,0.4514026641845703
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,1,128,1,float16,fp8,0,0.27990400791168213
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,32,8,128,1,float16,fp8,0,0.5244160095850626
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,32,128,1,fp8,fp8,0,0.27063467105229694
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,32,128,1,float16,fp8,0,0.29687466224034625
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,32,128,1,float16,float16,0,0.30215466022491455
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,1,128,1,fp8,fp8,0,0.2307466665903727
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,2,128,1,float16,float16,0,0.28807999690373737
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,2,128,1,float16,fp8,0,0.2828426758448283
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,2,128,1,fp8,fp8,0,0.23070400953292847
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,4,128,1,float16,float16,0,0.2902773420015971
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,4,128,1,float16,fp8,0,0.2836853265762329
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,4,128,1,fp8,fp8,0,0.23294933636983237
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,8,128,1,float16,float16,0,0.2938506603240967
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,8,128,1,float16,fp8,0,0.28677332401275635
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,32,8,128,1,fp8,fp8,0,0.23732799291610718
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,32,128,1,float16,float16,0,0.1681279937426249
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,32,128,1,fp8,fp8,0,0.14750933647155762
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,32,128,1,float16,fp8,0,0.16390933593114218
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,1,128,1,float16,float16,0,0.15601600209871927
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,1,128,1,float16,fp8,0,0.15125866731007895
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,2,128,1,float16,float16,0,0.15728533267974854
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,1,128,1,fp8,fp8,0,0.13242133458455405
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,2,128,1,fp8,fp8,0,0.13333333532015482
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,2,128,1,float16,fp8,0,0.15219733119010925
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,4,128,1,float16,float16,0,0.15634666879971823
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,4,128,1,float16,fp8,0,0.1527786652247111
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,4,128,1,fp8,fp8,0,0.13457066814104715
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,8,128,1,float16,float16,0,0.15818132956822714
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,8,128,1,fp8,fp8,0,0.13667733470598856
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,32,8,128,1,float16,fp8,0,0.1548426647981008
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,32,128,1,float16,float16,0,0.0992693305015564
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,1,128,1,float16,float16,0,0.09321600198745728
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,32,128,1,fp8,fp8,0,0.09090133508046468
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,32,128,1,float16,fp8,0,0.09676266709963481
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,1,128,1,fp8,fp8,0,0.0810346653064092
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,1,128,1,float16,fp8,0,0.0918880005677541
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,2,128,1,float16,float16,0,0.09361599882443745
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,2,128,1,float16,fp8,0,0.09206400314966838
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,2,128,1,fp8,fp8,0,0.0817440003156662
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,4,128,1,float16,float16,0,0.09337066610654195
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,4,128,1,float16,fp8,0,0.09184533357620239
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,4,128,1,fp8,fp8,0,0.08184533317883809
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,8,128,1,float16,float16,0,0.09406399726867676
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,8,128,1,fp8,fp8,0,0.08388800422350566
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,32,8,128,1,float16,fp8,0,0.09334933757781982
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,32,1,128,1,float16,fp8,0,3.7997865676879883
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,32,1,128,1,fp8,fp8,0,3.22651735941569
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,32,1,128,1,float16,float16,0,3.781818707784017
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,32,2,128,1,float16,float16,0,3.823274612426758
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,32,2,128,1,float16,fp8,0,3.8320639928181968
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,32,2,128,1,fp8,fp8,0,3.3346452713012695
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,32,4,128,1,float16,fp8,0,3.949157396952311
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,32,4,128,1,float16,float16,0,3.8609708150227866
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,1,128,1,float16,float16,0,1.8694507280985515
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,32,4,128,1,fp8,fp8,0,3.3690293629964194
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,32,8,128,1,float16,float16,0,3.977402687072754
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,32,8,128,1,float16,fp8,0,4.004560152689616
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,1,128,1,float16,fp8,0,1.8732959429423015
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,32,8,128,1,fp8,fp8,0,3.4984960556030273
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,1,128,1,fp8,fp8,0,1.60097074508667
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,32,128,1,float16,fp8,0,2.1669066747029624
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,32,128,1,fp8,fp8,0,1.892757256825765
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,32,128,1,float16,float16,0,2.204298655192057
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,2,128,1,float16,float16,0,1.8931679725646973
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,2,128,1,fp8,fp8,0,1.6531306902567546
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,4,128,1,float16,float16,0,1.9178880055745442
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,2,128,1,float16,fp8,0,1.9149492581685383
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,4,128,1,float16,fp8,0,1.9360052744547527
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,4,128,1,fp8,fp8,0,1.6887839635213215
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,8,128,1,float16,fp8,0,1.9950186411539714
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,1,128,1,float16,float16,0,0.9546186923980713
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,8,128,1,float16,float16,0,1.971018632253011
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,1,128,1,float16,fp8,0,0.9458719889322916
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,32,128,1,float16,float16,0,1.125386635462443
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,32,8,128,1,fp8,fp8,0,1.748304049173991
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,32,128,1,float16,fp8,0,1.0886346499125164
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,32,128,1,fp8,fp8,0,0.9510773022969564
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,1,128,1,fp8,fp8,0,0.7999200026194254
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,2,128,1,float16,float16,0,0.9660053253173828
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,2,128,1,float16,fp8,0,0.9667200247446696
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,2,128,1,fp8,fp8,0,0.832319974899292
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,4,128,1,float16,float16,0,0.9703359603881836
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,4,128,1,float16,fp8,0,0.9746293226877848
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,4,128,1,fp8,fp8,0,0.8488372961680094
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,8,128,1,float16,float16,0,0.9857493241628011
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,1,128,1,float16,float16,0,0.4873226483662923
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,8,128,1,float16,fp8,0,0.9902559916178385
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,32,8,128,1,fp8,fp8,0,0.8846346537272135
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,1,128,1,float16,fp8,0,0.4875466823577881
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,1,128,1,fp8,fp8,0,0.40856532255808514
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,32,128,1,float16,float16,0,0.5627253452936808
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,32,128,1,float16,fp8,0,0.5482613245646158
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,2,128,1,float16,float16,0,0.49223466714223224
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,32,128,1,fp8,fp8,0,0.4827413161595662
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,2,128,1,fp8,fp8,0,0.4288959900538127
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,2,128,1,float16,fp8,0,0.49396800994873047
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,4,128,1,float16,float16,0,0.4960266749064128
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,4,128,1,float16,fp8,0,0.5007040103276571
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,4,128,1,fp8,fp8,0,0.4357493321100871
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,8,128,1,float16,float16,0,0.5033119916915894
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,8,128,1,float16,fp8,0,0.5072426795959473
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,32,8,128,1,fp8,fp8,0,0.4522240161895752
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,32,128,1,float16,float16,0,0.2945546706517537
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,32,128,1,float16,fp8,0,0.2861386736234029
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,32,128,1,fp8,fp8,0,0.2505279978116353
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,1,128,1,float16,float16,0,0.2591093381245931
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,1,128,1,float16,fp8,0,0.25775466362635296
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,1,128,1,fp8,fp8,0,0.21466133991877237
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,2,128,1,float16,float16,0,0.25840532779693604
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,2,128,1,float16,fp8,0,0.2606933315594991
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,2,128,1,fp8,fp8,0,0.22418133417765299
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,4,128,1,float16,float16,0,0.2596106727917989
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,4,128,1,float16,fp8,0,0.2632906635602315
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,4,128,1,fp8,fp8,0,0.22485333681106567
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,8,128,1,float16,float16,0,0.2659200032552083
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,1,128,1,float16,float16,0,0.1402666668097178
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,8,128,1,fp8,fp8,0,0.2339413364728292
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,32,8,128,1,float16,fp8,0,0.26527466376622516
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,32,128,1,float16,float16,0,0.16086933016777039
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,1,128,1,float16,fp8,0,0.140773336092631
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,32,128,1,float16,fp8,0,0.15784000356992087
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,32,128,1,fp8,fp8,0,0.12754666805267334
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,1,128,1,fp8,fp8,0,0.10904533664385478
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,2,128,1,float16,float16,0,0.14105066657066345
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,2,128,1,float16,fp8,0,0.14126933614412943
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,2,128,1,fp8,fp8,0,0.11125866572062175
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,4,128,1,float16,float16,0,0.14297067125638327
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,4,128,1,float16,fp8,0,0.14378666877746582
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,4,128,1,fp8,fp8,0,0.11356266339619954
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,8,128,1,float16,float16,0,0.14485333363215128
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,8,128,1,float16,fp8,0,0.1459999978542328
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,32,8,128,1,fp8,fp8,0,0.1165013313293457
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,1,128,1,float16,float16,0,0.0740586668252945
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,1,128,1,float16,fp8,0,0.07347199817498525
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,32,128,1,float16,float16,0,0.0842186709245046
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,32,128,1,float16,fp8,0,0.08262399832407634
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,1,128,1,fp8,fp8,0,0.06016000111897787
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,32,128,1,fp8,fp8,0,0.0721919983625412
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,2,128,1,float16,float16,0,0.07376533250013988
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,4,128,1,float16,float16,0,0.07432533303896587
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,2,128,1,fp8,fp8,0,0.06182933350404104
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,2,128,1,float16,fp8,0,0.07310933371384938
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,4,128,1,float16,fp8,0,0.07482666770617168
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,4,128,1,fp8,fp8,0,0.06225599845250448
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,8,128,1,float16,float16,0,0.07632533212502797
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,8,128,1,float16,fp8,0,0.07685866455237071
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,32,8,128,1,fp8,fp8,0,0.06712000072002411
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,1,128,1,float16,float16,0,0.04238399863243103
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,32,128,1,float16,float16,0,0.047093331813812256
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,1,128,1,float16,fp8,0,0.04252266883850098
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,32,128,1,fp8,fp8,0,0.04311466713746389
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,1,128,1,fp8,fp8,0,0.03615466753641764
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,2,128,1,float16,float16,0,0.042362665136655174
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,32,128,1,float16,fp8,0,0.046154667933781944
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,2,128,1,float16,fp8,0,0.04286933441956838
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,2,128,1,fp8,fp8,0,0.03623466690381368
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,4,128,1,float16,float16,0,0.04274133344491323
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,4,128,1,float16,fp8,0,0.04251199960708618
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,4,128,1,fp8,fp8,0,0.03756800045569738
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,8,128,1,float16,fp8,0,0.043322667479515076
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,8,128,1,fp8,fp8,0,0.03854399919509888
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,32,8,128,1,float16,float16,0,0.04304533203442892
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,32,1,128,1,float16,fp8,0,2.9190667470296225
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,32,1,128,1,fp8,fp8,0,2.6836907068888345
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,32,1,128,1,float16,float16,0,2.937199910481771
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,32,2,128,1,float16,float16,0,2.9798399607340493
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,32,2,128,1,float16,fp8,0,2.9629494349161782
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,32,2,128,1,fp8,fp8,0,2.764490763346354
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,32,4,128,1,float16,float16,0,3.0311733881632485
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,32,4,128,1,float16,fp8,0,3.0434560775756836
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,1,128,1,float16,float16,0,1.4530240694681804
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,32,4,128,1,fp8,fp8,0,2.820826530456543
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,1,128,1,float16,fp8,0,1.4525334040323894
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,32,8,128,1,float16,float16,0,3.1965014139811196
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,32,8,128,1,float16,fp8,0,3.163818677266439
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,32,8,128,1,fp8,fp8,0,2.947418530782064
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,32,128,1,float16,float16,0,1.818186601003011
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,32,128,1,float16,fp8,0,1.7709813117980957
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,32,128,1,fp8,fp8,0,1.628111998240153
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,1,128,1,fp8,fp8,0,1.3377280235290527
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,2,128,1,float16,float16,0,1.4766292572021484
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,2,128,1,float16,fp8,0,1.4914827346801758
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,2,128,1,fp8,fp8,0,1.3962559700012207
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,4,128,1,float16,float16,0,1.5040000279744465
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,4,128,1,float16,fp8,0,1.518778642018636
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,4,128,1,fp8,fp8,0,1.420805295308431
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,1,128,1,float16,float16,0,0.7386453151702881
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,1,128,1,float16,fp8,0,0.737231969833374
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,8,128,1,float16,fp8,0,1.5760159492492676
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,8,128,1,float16,float16,0,1.558517297108968
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,32,8,128,1,fp8,fp8,0,1.484373410542806
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,32,128,1,float16,float16,0,0.9147733052571615
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,1,128,1,fp8,fp8,0,0.664405345916748
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,2,128,1,float16,float16,0,0.7486026287078857
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,2,128,1,float16,fp8,0,0.7483306725819906
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,32,128,1,fp8,fp8,0,0.8145813147226969
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,2,128,1,fp8,fp8,0,0.6985867023468018
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,32,128,1,float16,fp8,0,0.891103982925415
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,4,128,1,float16,float16,0,0.7562879721323649
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,4,128,1,float16,fp8,0,0.7662239869435629
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,4,128,1,fp8,fp8,0,0.710640033086141
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,8,128,1,float16,float16,0,0.7724853356679281
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,8,128,1,float16,fp8,0,0.7822559674580892
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,32,128,1,float16,float16,0,0.4586613178253174
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,1,128,1,float16,float16,0,0.38173333803812665
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,1,128,1,float16,fp8,0,0.3789120117823283
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,32,8,128,1,fp8,fp8,0,0.7440266609191895
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,32,128,1,float16,fp8,0,0.44735467433929443
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,1,128,1,fp8,fp8,0,0.3391573429107666
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,2,128,1,float16,float16,0,0.3830133279164632
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,32,128,1,fp8,fp8,0,0.4142293135325114
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,2,128,1,float16,fp8,0,0.38654398918151855
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,2,128,1,fp8,fp8,0,0.35678398609161377
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,4,128,1,float16,float16,0,0.38733867804209393
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,4,128,1,float16,fp8,0,0.38896000385284424
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,4,128,1,fp8,fp8,0,0.3606239954630534
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,8,128,1,float16,float16,0,0.3973493178685506
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,1,128,1,float16,float16,0,0.2005386749903361
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,8,128,1,float16,fp8,0,0.3988800048828125
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,32,128,1,float16,float16,0,0.24119999011357626
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,32,128,1,float16,fp8,0,0.2352693279584249
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,32,128,1,fp8,fp8,0,0.21370132764180502
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,32,8,128,1,fp8,fp8,0,0.37836798032124835
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,1,128,1,fp8,fp8,0,0.1791306734085083
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,1,128,1,float16,fp8,0,0.20026665925979614
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,2,128,1,float16,float16,0,0.20074133078257242
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,2,128,1,float16,fp8,0,0.20198933283487955
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,2,128,1,fp8,fp8,0,0.18608532349268594
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,4,128,1,float16,float16,0,0.20570667584737143
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,4,128,1,float16,fp8,0,0.20504534244537354
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,4,128,1,fp8,fp8,0,0.18576000134150186
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,8,128,1,float16,float16,0,0.20804266134897867
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,8,128,1,fp8,fp8,0,0.19618666172027588
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,32,8,128,1,float16,fp8,0,0.21058666706085205
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,32,128,1,float16,float16,0,0.1313813328742981
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,1,128,1,float16,float16,0,0.10925867160161336
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,1,128,1,float16,fp8,0,0.10934933026631673
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,32,128,1,float16,fp8,0,0.12746133406956991
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,1,128,1,fp8,fp8,0,0.09160533547401428
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,32,128,1,fp8,fp8,0,0.11319466431935628
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,2,128,1,float16,float16,0,0.1092693308989207
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,2,128,1,fp8,fp8,0,0.09437333544095357
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,4,128,1,float16,float16,0,0.1113973359266917
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,4,128,1,float16,fp8,0,0.11146666606267293
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,4,128,1,fp8,fp8,0,0.09648000200589497
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,2,128,1,float16,fp8,0,0.11027733484903972
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,8,128,1,float16,fp8,0,0.11452266573905945
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,8,128,1,float16,float16,0,0.11403733491897583
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,32,8,128,1,fp8,fp8,0,0.09974400202433269
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,32,128,1,float16,float16,0,0.07415466507275899
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,1,128,1,float16,float16,0,0.06053866446018219
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,1,128,1,fp8,fp8,0,0.051829333106676735
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,32,128,1,float16,fp8,0,0.0701333334048589
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,2,128,1,float16,float16,0,0.061008001367251076
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,32,128,1,fp8,fp8,0,0.06386133531729381
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,1,128,1,float16,fp8,0,0.061754668752352394
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,2,128,1,float16,fp8,0,0.060746664802233376
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,2,128,1,fp8,fp8,0,0.05226133267084757
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,4,128,1,float16,float16,0,0.06182933350404104
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,4,128,1,float16,fp8,0,0.06201600035031637
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,4,128,1,fp8,fp8,0,0.05502399802207947
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,8,128,1,float16,float16,0,0.0632479985555013
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,8,128,1,float16,fp8,0,0.06381866832574208
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,32,128,1,float16,float16,0,0.04002666721741358
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,1,128,1,float16,float16,0,0.036176001032193504
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,1,128,1,float16,fp8,0,0.03619733452796936
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,32,128,1,fp8,fp8,0,0.03802666564782461
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,32,128,1,float16,fp8,0,0.03950933367013931
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,1,128,1,fp8,fp8,0,0.03205333401759466
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,32,8,128,1,fp8,fp8,0,0.05835733314355215
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,2,128,1,float16,float16,0,0.03620799879233042
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,2,128,1,float16,fp8,0,0.036015999813874565
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,2,128,1,fp8,fp8,0,0.032218667368094124
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,4,128,1,float16,float16,0,0.03623466690381368
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,4,128,1,float16,fp8,0,0.036789332826932274
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,4,128,1,fp8,fp8,0,0.03289599965016047
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,8,128,1,float16,float16,0,0.03674133370320002
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,8,128,1,float16,fp8,0,0.03737599899371465
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,32,8,128,1,fp8,fp8,0,0.033626665671666466
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,32,128,1,float16,fp8,0,0.026447998980681103
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,32,128,1,float16,float16,0,0.02638400097688039
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,32,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,1,128,1,float16,float16,0,0.025114665428797405
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,1,128,1,float16,fp8,0,0.025199999411900837
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,1,128,1,fp8,fp8,0,0.023221333821614582
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,2,128,1,float16,float16,0,0.025349333882331848
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,2,128,1,float16,fp8,0,0.025242666403452556
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,2,128,1,fp8,fp8,0,0.023311999936898548
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,4,128,1,float16,float16,0,0.025349333882331848
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,4,128,1,float16,fp8,0,0.025759999950726826
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,4,128,1,fp8,fp8,0,0.02384000023206075
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,8,128,1,float16,fp8,0,0.02603200078010559
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,8,128,1,float16,float16,0,0.02554133286078771
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,32,8,128,1,fp8,fp8,0,0.024442667762438457
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,32,1,128,1,float16,float16,0,1.248464028040568
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,32,1,128,1,float16,fp8,0,1.2521920204162598
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,32,1,128,1,fp8,fp8,0,1.0880906581878662
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,32,2,128,1,float16,float16,0,1.2712213198343914
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,32,2,128,1,fp8,fp8,0,1.1401493549346924
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,32,2,128,1,float16,fp8,0,1.2708106835683186
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,32,4,128,1,float16,float16,0,1.291215976079305
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,32,4,128,1,float16,fp8,0,1.3034666379292805
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,1,128,1,float16,float16,0,0.6353866656621298
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,32,4,128,1,fp8,fp8,0,1.1799466609954834
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,32,8,128,1,float16,float16,0,1.339903990427653
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,32,128,1,float16,float16,0,0.8203252951304117
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,32,8,128,1,float16,fp8,0,1.3427039782206218
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,32,8,128,1,fp8,fp8,0,1.262453317642212
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,1,128,1,float16,fp8,0,0.6312373479207357
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,32,128,1,float16,fp8,0,0.7867200374603271
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,32,128,1,fp8,fp8,0,0.7068320115407308
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,1,128,1,fp8,fp8,0,0.546618660291036
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,2,128,1,float16,float16,0,0.6437173287073771
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,2,128,1,float16,fp8,0,0.6434400081634521
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,2,128,1,fp8,fp8,0,0.5736000140508016
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,4,128,1,float16,float16,0,0.6498506863911947
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,4,128,1,float16,fp8,0,0.6575573285420736
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,4,128,1,fp8,fp8,0,0.5957653522491455
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,8,128,1,float16,float16,0,0.663920005162557
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,1,128,1,float16,fp8,0,0.32546667257944745
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,1,128,1,float16,float16,0,0.32544533411661786
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,8,128,1,float16,fp8,0,0.6701227029164633
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,1,128,1,fp8,fp8,0,0.27986133098602295
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,32,8,128,1,fp8,fp8,0,0.6339466571807861
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,32,128,1,float16,float16,0,0.4143093427022298
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,32,128,1,float16,fp8,0,0.39660799503326416
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,32,128,1,fp8,fp8,0,0.3598560094833374
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,2,128,1,float16,float16,0,0.33034666379292804
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,2,128,1,float16,fp8,0,0.33130133152008057
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,2,128,1,fp8,fp8,0,0.2940533359845479
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,4,128,1,float16,float16,0,0.3318026661872864
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,4,128,1,float16,fp8,0,0.3372053305308024
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,4,128,1,fp8,fp8,0,0.30314133564631146
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,8,128,1,float16,float16,0,0.34036799271901447
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,1,128,1,float16,float16,0,0.1725813349088033
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,1,128,1,float16,fp8,0,0.17244267463684082
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,8,128,1,fp8,fp8,0,0.3248213330904643
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,32,8,128,1,float16,fp8,0,0.343664010365804
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,32,128,1,float16,float16,0,0.21553067366282144
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,32,128,1,float16,fp8,0,0.2076586683591207
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,1,128,1,fp8,fp8,0,0.148799995581309
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,32,128,1,fp8,fp8,0,0.18721065918604532
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,2,128,1,float16,float16,0,0.17377599080403647
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,2,128,1,float16,fp8,0,0.1736853321393331
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,2,128,1,fp8,fp8,0,0.1569813291231791
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,4,128,1,float16,fp8,0,0.1760586698849996
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,4,128,1,float16,float16,0,0.17520000537236533
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,8,128,1,float16,float16,0,0.18070934216181436
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,4,128,1,fp8,fp8,0,0.15753600001335144
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,8,128,1,float16,fp8,0,0.18036266167958578
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,32,8,128,1,fp8,fp8,0,0.1682186722755432
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,1,128,1,float16,float16,0,0.09469866752624512
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,1,128,1,float16,fp8,0,0.09361599882443745
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,32,128,1,float16,float16,0,0.11853333314259847
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,1,128,1,fp8,fp8,0,0.08260266482830048
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,32,128,1,float16,fp8,0,0.11447999874750774
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,32,128,1,fp8,fp8,0,0.10488532980283101
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,2,128,1,float16,float16,0,0.09403199950853984
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,2,128,1,float16,fp8,0,0.09564800063769023
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,4,128,1,float16,float16,0,0.09525866309801738
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,2,128,1,fp8,fp8,0,0.08550933003425598
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,4,128,1,float16,fp8,0,0.09570133686065674
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,4,128,1,fp8,fp8,0,0.08649067083994548
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,8,128,1,fp8,fp8,0,0.09076799949010213
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,8,128,1,float16,float16,0,0.09877866506576538
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,32,128,1,float16,float16,0,0.0706879993279775
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,32,8,128,1,float16,fp8,0,0.09891200065612793
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,1,128,1,float16,float16,0,0.0537066658337911
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,32,128,1,float16,fp8,0,0.06798933446407318
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,1,128,1,float16,fp8,0,0.054511999090512596
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,32,128,1,fp8,fp8,0,0.05975466469923655
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,1,128,1,fp8,fp8,0,0.04689066608746847
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,2,128,1,float16,float16,0,0.05402666827042898
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,2,128,1,float16,fp8,0,0.05484800040721893
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,4,128,1,float16,fp8,0,0.055173332492510475
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,4,128,1,float16,float16,0,0.05489600201447805
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,4,128,1,fp8,fp8,0,0.04922133187452952
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,2,128,1,fp8,fp8,0,0.04756799836953481
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,8,128,1,float16,float16,0,0.057487999399503074
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,8,128,1,float16,fp8,0,0.057087997595469155
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,32,128,1,float16,float16,0,0.03705599904060364
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,32,8,128,1,fp8,fp8,0,0.052442664901415505
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,32,128,1,float16,fp8,0,0.03674133370320002
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,32,128,1,fp8,fp8,0,0.035605333745479584
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,2,128,1,float16,float16,0,0.033530667424201965
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,1,128,1,fp8,fp8,0,0.029781334102153778
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,1,128,1,float16,fp8,0,0.033530667424201965
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,2,128,1,float16,fp8,0,0.03333866596221924
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,2,128,1,fp8,fp8,0,0.02979733298222224
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,4,128,1,float16,float16,0,0.033359999457995095
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,4,128,1,float16,fp8,0,0.0335359995563825
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,4,128,1,fp8,fp8,0,0.03018666555484136
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,8,128,1,float16,float16,0,0.034143999218940735
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,8,128,1,float16,fp8,0,0.03430933256944021
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,32,128,1,float16,float16,0,0.023930666347344715
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,32,128,1,float16,fp8,0,0.02402133246262868
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,1,128,1,float16,float16,0,0.022255999346574146
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,1,128,1,fp8,fp8,0,0.02093333254257838
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,1,128,1,float16,fp8,0,0.022682666778564453
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,32,8,128,1,fp8,fp8,0,0.03152533372243246
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,32,128,1,fp8,fp8,0,0.022826666633288067
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,2,128,1,float16,float16,0,0.022442666192849476
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,2,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,2,128,1,fp8,fp8,0,0.021045332153638203
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,4,128,1,float16,float16,0,0.02274666726589203
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,4,128,1,float16,fp8,0,0.023381332556406658
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,4,128,1,fp8,fp8,0,0.021946666141351063
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,8,128,1,float16,fp8,0,0.023370665808518726
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,8,128,1,fp8,fp8,0,0.022554665803909302
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,32,8,128,1,float16,float16,0,0.02288000037272771
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,32,128,1,float16,float16,0,0.020314666132132213
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,32,128,1,float16,fp8,0,0.02027733375628789
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,32,128,1,fp8,fp8,0,0.01913600042462349
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,1,128,1,float16,float16,0,0.019082666685183842
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,1,128,1,float16,fp8,0,0.019050666441520054
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,1,128,1,fp8,fp8,0,0.01826133330663045
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,2,128,1,float16,float16,0,0.019093333433071773
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,2,128,1,float16,fp8,0,0.019391999890406925
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,2,128,1,fp8,fp8,0,0.01841066653529803
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,4,128,1,float16,float16,0,0.01942933350801468
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,4,128,1,float16,fp8,0,0.019653332730134327
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,4,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,8,128,1,float16,float16,0,0.01953599974513054
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,8,128,1,float16,fp8,0,0.019509332875410717
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,32,8,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,32,1,128,1,float16,fp8,0,0.580512007077535
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,32,2,128,1,float16,float16,0,0.5921173493067423
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,32,2,128,1,float16,fp8,0,0.5969599882761637
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,32,1,128,1,float16,float16,0,0.5844693183898926
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,32,2,128,1,fp8,fp8,0,0.5798773368199667
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,32,1,128,1,fp8,fp8,0,0.5471466779708862
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,32,4,128,1,float16,fp8,0,0.6055413484573364
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,32,4,128,1,float16,float16,0,0.5996266603469849
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,1,128,1,float16,float16,0,0.3004586696624756
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,32,4,128,1,fp8,fp8,0,0.6031839847564697
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,32,8,128,1,float16,float16,0,0.6218026479085287
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,32,128,1,float16,float16,0,0.4004160165786743
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,32,128,1,float16,fp8,0,0.38577600320180255
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,32,8,128,1,float16,fp8,0,0.6248159805933634
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,1,128,1,float16,fp8,0,0.30126933256785077
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,32,128,1,fp8,fp8,0,0.35865600903828937
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,32,8,128,1,fp8,fp8,0,0.6352906624476115
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,1,128,1,fp8,fp8,0,0.2791893283526103
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,2,128,1,float16,float16,0,0.3053599993387858
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,2,128,1,float16,fp8,0,0.3079306681950887
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,2,128,1,fp8,fp8,0,0.2965173323949178
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,4,128,1,float16,float16,0,0.3081120053927104
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,4,128,1,float16,fp8,0,0.311845342318217
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,4,128,1,fp8,fp8,0,0.30189333359400433
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,8,128,1,float16,fp8,0,0.3221013347307841
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,8,128,1,float16,float16,0,0.32203733921051025
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,1,128,1,float16,float16,0,0.16239999731381735
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,1,128,1,float16,fp8,0,0.16263999541600546
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,32,8,128,1,fp8,fp8,0,0.3238933285077413
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,32,128,1,float16,float16,0,0.21582933266957602
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,1,128,1,fp8,fp8,0,0.14864533146222433
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,32,128,1,float16,fp8,0,0.20848000049591064
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,2,128,1,float16,float16,0,0.1638826628526052
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,32,128,1,fp8,fp8,0,0.18655999501546225
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,2,128,1,float16,fp8,0,0.16390400131543478
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,2,128,1,fp8,fp8,0,0.1537813345591227
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,4,128,1,float16,float16,0,0.165610671043396
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,4,128,1,float16,fp8,0,0.16562133034070334
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,4,128,1,fp8,fp8,0,0.15772266189257303
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,8,128,1,float16,float16,0,0.17067732413609824
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,8,128,1,fp8,fp8,0,0.16818133989969888
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,32,8,128,1,float16,fp8,0,0.17121599117914835
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,32,128,1,float16,float16,0,0.11958932876586914
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,1,128,1,float16,float16,0,0.08939199646313985
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,1,128,1,float16,fp8,0,0.09020266930262248
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,32,128,1,float16,fp8,0,0.11574932932853699
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,32,128,1,fp8,fp8,0,0.10283733407656352
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,1,128,1,fp8,fp8,0,0.08301333089669545
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,2,128,1,float16,float16,0,0.08916800220807393
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,2,128,1,float16,fp8,0,0.0906773308912913
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,2,128,1,fp8,fp8,0,0.08337066570917766
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,4,128,1,float16,float16,0,0.09111467003822327
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,4,128,1,float16,fp8,0,0.0913706620534261
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,4,128,1,fp8,fp8,0,0.08596266309420268
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,8,128,1,float16,fp8,0,0.09390399853388469
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,8,128,1,float16,float16,0,0.09392533699671428
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,32,8,128,1,fp8,fp8,0,0.09038399656613667
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,32,128,1,float16,fp8,0,0.06529066463311513
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,1,128,1,float16,float16,0,0.05075199902057648
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,32,128,1,float16,float16,0,0.06924266616503398
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,32,128,1,fp8,fp8,0,0.05975999931494395
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,1,128,1,fp8,fp8,0,0.04651199777921041
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,2,128,1,float16,float16,0,0.050901333491007485
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,2,128,1,float16,fp8,0,0.050810664892196655
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,2,128,1,fp8,fp8,0,0.04742933313051859
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,4,128,1,float16,float16,0,0.05150400102138519
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,1,128,1,float16,fp8,0,0.05217599868774414
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,4,128,1,fp8,fp8,0,0.04849066833655039
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,4,128,1,float16,fp8,0,0.05314133564631144
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,8,128,1,float16,float16,0,0.05332799752553304
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,8,128,1,float16,fp8,0,0.054144000013669334
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,32,8,128,1,fp8,fp8,0,0.0516533354918162
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,32,128,1,float16,fp8,0,0.03624533365170161
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,32,128,1,float16,float16,0,0.03682666768630346
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,1,128,1,float16,float16,0,0.03201066702604294
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,32,128,1,fp8,fp8,0,0.03566933423280716
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,1,128,1,float16,fp8,0,0.032357332607110344
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,1,128,1,fp8,fp8,0,0.02959466725587845
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,2,128,1,fp8,fp8,0,0.029824001093705494
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,2,128,1,float16,float16,0,0.03221333275238673
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,2,128,1,float16,fp8,0,0.03242133309443792
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,4,128,1,float16,float16,0,0.032325332363446556
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,4,128,1,float16,fp8,0,0.0322826678554217
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,8,128,1,float16,fp8,0,0.0330826664964358
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,8,128,1,fp8,fp8,0,0.03128000100453695
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,8,128,1,float16,float16,0,0.03315199911594391
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,32,4,128,1,fp8,fp8,0,0.03033066789309184
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,32,128,1,fp8,fp8,0,0.02272533377011617
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,32,128,1,float16,fp8,0,0.0232640008131663
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,32,128,1,float16,float16,0,0.023237332701683044
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,1,128,1,float16,float16,0,0.021546666820844013
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,1,128,1,fp8,fp8,0,0.020687999824682873
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,1,128,1,float16,fp8,0,0.021941334009170532
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,2,128,1,float16,float16,0,0.021701333423455555
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,2,128,1,fp8,fp8,0,0.02075733368595441
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,4,128,1,float16,float16,0,0.022181332111358643
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,4,128,1,float16,fp8,0,0.02253866692384084
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,4,128,1,fp8,fp8,0,0.021701333423455555
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,2,128,1,float16,fp8,0,0.02184533327817917
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,8,128,1,float16,float16,0,0.022053333620230358
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,8,128,1,float16,fp8,0,0.02293333411216736
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,32,128,1,float16,float16,0,0.01850133389234543
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,32,128,1,float16,fp8,0,0.018624000251293182
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,32,8,128,1,fp8,fp8,0,0.02250666668017705
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,32,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,1,128,1,float16,float16,0,0.017994667092959087
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,1,128,1,fp8,fp8,0,0.018197332819302876
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,2,128,1,float16,float16,0,0.017829333742459614
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,2,128,1,fp8,fp8,0,0.018474667022625606
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,2,128,1,float16,fp8,0,0.018288000176350277
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,1,128,1,float16,fp8,0,0.01823466643691063
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,4,128,1,fp8,fp8,0,0.018895999838908512
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,4,128,1,float16,fp8,0,0.01851733277241389
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,8,128,1,float16,float16,0,0.018432000031073887
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,4,128,1,float16,float16,0,0.018207999567190807
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,8,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,32,8,128,1,float16,fp8,0,0.018565333137909572
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,1,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,32,128,1,fp8,fp8,0,0.01730666682124138
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,32,128,1,float16,float16,0,0.016783999900023144
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,1,128,1,float16,float16,0,0.016544000556071598
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,32,128,1,float16,fp8,0,0.017317333569129307
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,1,128,1,fp8,fp8,0,0.01801066721479098
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,2,128,1,float16,float16,0,0.016677333662907284
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,2,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,4,128,1,float16,float16,0,0.016943999876578648
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,2,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,4,128,1,float16,fp8,0,0.01727466657757759
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,4,128,1,fp8,fp8,0,0.01738133281469345
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,8,128,1,fp8,fp8,0,0.017466666797796886
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,8,128,1,float16,fp8,0,0.017535999417304993
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,32,8,128,1,float16,float16,0,0.01692266638080279
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,32,1,128,1,float16,fp8,0,0.3661280075709025
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,32,1,128,1,float16,float16,0,0.3670773506164551
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,32,1,128,1,fp8,fp8,0,0.3826506535212199
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,32,2,128,1,float16,float16,0,0.37170668443044025
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,32,2,128,1,fp8,fp8,0,0.3924586772918701
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,32,4,128,1,float16,float16,0,0.3770293394724528
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,32,2,128,1,float16,fp8,0,0.37162665526072186
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,32,4,128,1,fp8,fp8,0,0.4058186610539754
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,32,4,128,1,float16,fp8,0,0.37669865290323895
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,32,8,128,1,float16,float16,0,0.3887573480606079
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,32,8,128,1,float16,fp8,0,0.3861973285675049
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,32,8,128,1,fp8,fp8,0,0.42238398392995197
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,1,128,1,float16,float16,0,0.1919040083885193
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,32,128,1,fp8,fp8,0,0.23651200532913208
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,32,128,1,float16,fp8,0,0.21758933862050375
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,32,128,1,float16,float16,0,0.22482667366663614
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,1,128,1,fp8,fp8,0,0.20121600230534872
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,1,128,1,float16,fp8,0,0.19236799081166586
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,2,128,1,float16,float16,0,0.1939679980278015
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,2,128,1,float16,fp8,0,0.19408533970514932
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,4,128,1,float16,float16,0,0.1959679921468099
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,4,128,1,fp8,fp8,0,0.2084746758143107
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,2,128,1,fp8,fp8,0,0.20710933208465576
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,8,128,1,float16,float16,0,0.2024959921836853
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,4,128,1,float16,fp8,0,0.19767999649047852
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,8,128,1,fp8,fp8,0,0.21724800268809
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,32,8,128,1,float16,fp8,0,0.20090667406717935
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,32,128,1,float16,float16,0,0.12153066198031108
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,1,128,1,float16,fp8,0,0.10427199800809224
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,1,128,1,float16,float16,0,0.10378666718800862
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,1,128,1,fp8,fp8,0,0.10938666264216106
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,32,128,1,float16,fp8,0,0.11777599652608235
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,32,128,1,fp8,fp8,0,0.12773332993189493
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,2,128,1,float16,float16,0,0.10417600472768147
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,2,128,1,fp8,fp8,0,0.11175466577212016
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,2,128,1,float16,fp8,0,0.10583466291427612
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,4,128,1,float16,float16,0,0.10575999816258748
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,4,128,1,float16,fp8,0,0.10612799723943074
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,4,128,1,fp8,fp8,0,0.11344533165295918
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,8,128,1,float16,float16,0,0.10847999652226765
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,8,128,1,float16,fp8,0,0.10837866862614949
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,32,8,128,1,fp8,fp8,0,0.11812800168991089
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,32,128,1,float16,fp8,0,0.06677866478761037
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,32,128,1,float16,float16,0,0.06855999926726024
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,1,128,1,float16,float16,0,0.05752533177534739
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,32,128,1,fp8,fp8,0,0.07264000177383423
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,1,128,1,fp8,fp8,0,0.06170133252938589
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,1,128,1,float16,fp8,0,0.05878399809201559
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,2,128,1,float16,fp8,0,0.05856533348560333
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,2,128,1,float16,float16,0,0.05827199916044871
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,2,128,1,fp8,fp8,0,0.062352001667022705
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,4,128,1,float16,float16,0,0.059343998630841575
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,4,128,1,float16,fp8,0,0.058970664938290916
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,8,128,1,float16,float16,0,0.060831998785336815
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,4,128,1,fp8,fp8,0,0.06297066807746887
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,8,128,1,float16,fp8,0,0.061066667238871254
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,32,8,128,1,fp8,fp8,0,0.06605866551399231
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,32,128,1,float16,fp8,0,0.036176001032193504
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,32,128,1,fp8,fp8,0,0.0424586683511734
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,1,128,1,float16,fp8,0,0.0349386657277743
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,1,128,1,fp8,fp8,0,0.03678400069475174
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,1,128,1,float16,float16,0,0.03477333237727483
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,2,128,1,float16,float16,0,0.03518400092919668
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,32,128,1,float16,float16,0,0.03678400069475174
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,2,128,1,float16,fp8,0,0.0351946676770846
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,2,128,1,fp8,fp8,0,0.036864000062147774
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,4,128,1,float16,float16,0,0.03510399907827377
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,4,128,1,float16,fp8,0,0.03514666606982549
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,4,128,1,fp8,fp8,0,0.037402667105197906
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,8,128,1,float16,float16,0,0.03540800015131632
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,8,128,1,float16,fp8,0,0.03579733272393545
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,32,128,1,float16,float16,0,0.02403733382622401
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,32,128,1,float16,fp8,0,0.024490666886170704
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,32,128,1,fp8,fp8,0,0.026133333643277485
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,1,128,1,float16,float16,0,0.023018665611743927
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,1,128,1,float16,fp8,0,0.02333866556485494
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,1,128,1,fp8,fp8,0,0.02422933280467987
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,32,8,128,1,fp8,fp8,0,0.03819733361403147
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,2,128,1,float16,float16,0,0.02332799881696701
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,2,128,1,float16,fp8,0,0.023445333043734234
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,4,128,1,float16,float16,0,0.023423999547958374
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,2,128,1,fp8,fp8,0,0.024383999407291412
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,4,128,1,float16,fp8,0,0.023898666103680927
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,4,128,1,fp8,fp8,0,0.025055999557177227
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,8,128,1,float16,float16,0,0.023621333142121632
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,8,128,1,fp8,fp8,0,0.025797332326571148
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,32,8,128,1,float16,fp8,0,0.023872000475724537
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,32,128,1,float16,fp8,0,0.018474667022625606
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,32,128,1,fp8,fp8,0,0.01953599974513054
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,1,128,1,float16,float16,0,0.017279999951521557
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,1,128,1,float16,fp8,0,0.017450666675964992
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,1,128,1,fp8,fp8,0,0.01821333294113477
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,32,128,1,float16,float16,0,0.01836799954374631
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,2,128,1,float16,float16,0,0.017269333203633625
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,2,128,1,fp8,fp8,0,0.018357332795858383
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,2,128,1,float16,fp8,0,0.017562666287024815
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,4,128,1,float16,float16,0,0.01758933315674464
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,4,128,1,fp8,fp8,0,0.019141333798567455
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,4,128,1,float16,fp8,0,0.017840000490347546
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,8,128,1,float16,float16,0,0.017551999539136887
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,8,128,1,float16,fp8,0,0.018005333840847015
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,32,8,128,1,fp8,fp8,0,0.019487999379634857
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,32,128,1,float16,float16,0,0.015861333658297855
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,1,128,1,float16,float16,0,0.01563199982047081
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,32,128,1,fp8,fp8,0,0.017477333545684814
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,32,128,1,float16,fp8,0,0.016309333344300587
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,1,128,1,float16,fp8,0,0.016127999871969223
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,1,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,2,128,1,float16,float16,0,0.01563199982047081
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,2,128,1,float16,fp8,0,0.016202667107184727
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,2,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,4,128,1,float16,float16,0,0.015664000064134598
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,4,128,1,float16,fp8,0,0.01632000009218852
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,8,128,1,float16,float16,0,0.01569066693385442
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,8,128,1,float16,fp8,0,0.016149333367745083
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,8,128,1,fp8,fp8,0,0.017375999440749485
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,32,4,128,1,fp8,fp8,0,0.017450666675964992
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,32,128,1,float16,float16,0,0.015194666882356008
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,32,128,1,float16,fp8,0,0.015301333119471868
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,32,128,1,fp8,fp8,0,0.016458666572968166
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,1,128,1,float16,float16,0,0.015226667126019796
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,1,128,1,float16,fp8,0,0.015813333292802174
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,2,128,1,float16,float16,0,0.01526933287580808
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,1,128,1,fp8,fp8,0,0.016330666840076447
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,2,128,1,float16,fp8,0,0.015925332903862
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,2,128,1,fp8,fp8,0,0.016336000214020412
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,4,128,1,float16,float16,0,0.015072000523408255
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,4,128,1,float16,fp8,0,0.015829333414634068
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,4,128,1,fp8,fp8,0,0.01648533344268799
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,8,128,1,float16,fp8,0,0.01581866666674614
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,8,128,1,float16,float16,0,0.015018666783968607
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,32,8,128,1,fp8,fp8,0,0.016421332955360413
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,32,1,128,1,float16,float16,0,0.25900799036026
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,32,1,128,1,fp8,fp8,0,0.30802132685979206
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,32,2,128,1,float16,float16,0,0.2621973355611165
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,32,2,128,1,fp8,fp8,0,0.31219732761383057
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,32,4,128,1,float16,float16,0,0.26386133829752606
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,32,2,128,1,float16,fp8,0,0.2616159915924072
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,32,4,128,1,float16,fp8,0,0.2637066642443339
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,32,1,128,1,float16,fp8,0,0.25965332984924316
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,32,4,128,1,fp8,fp8,0,0.31497599681218463
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,32,8,128,1,fp8,fp8,0,0.32520532608032227
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,32,8,128,1,float16,fp8,0,0.26900267601013184
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,32,8,128,1,float16,float16,0,0.2691733241081238
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,32,128,1,float16,float16,0,0.15161599715550741
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,32,128,1,float16,fp8,0,0.14734933773676553
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,32,128,1,fp8,fp8,0,0.1800533334414164
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,1,128,1,float16,float16,0,0.13784000277519226
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,1,128,1,float16,fp8,0,0.13806399703025818
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,1,128,1,fp8,fp8,0,0.1643786629041036
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,2,128,1,float16,float16,0,0.13691733280817667
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,2,128,1,float16,fp8,0,0.1373546620210012
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,2,128,1,fp8,fp8,0,0.16609600186347961
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,4,128,1,float16,float16,0,0.13927466670672098
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,4,128,1,float16,fp8,0,0.13894933462142944
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,4,128,1,fp8,fp8,0,0.16582933068275452
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,8,128,1,float16,float16,0,0.14218133687973022
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,8,128,1,fp8,fp8,0,0.17109866937001547
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,1,128,1,float16,float16,0,0.07402666906515758
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,32,8,128,1,float16,fp8,0,0.14225600163141885
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,32,128,1,fp8,fp8,0,0.10030399759610494
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,32,128,1,float16,fp8,0,0.0821973333756129
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,32,128,1,float16,float16,0,0.08418132861455281
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,2,128,1,fp8,fp8,0,0.08917867143948872
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,2,128,1,float16,float16,0,0.07469333211580913
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,1,128,1,float16,fp8,0,0.0743146687746048
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,2,128,1,float16,fp8,0,0.07441066702206929
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,4,128,1,float16,float16,0,0.0753600001335144
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,1,128,1,fp8,fp8,0,0.0885759989420573
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,4,128,1,float16,fp8,0,0.07600533465544383
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,4,128,1,fp8,fp8,0,0.09019200007120769
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,8,128,1,float16,fp8,0,0.0772213339805603
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,1,128,1,float16,fp8,0,0.04295999805132548
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,8,128,1,float16,float16,0,0.07603733241558075
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,32,128,1,float16,fp8,0,0.04400533437728882
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,1,128,1,float16,float16,0,0.04267199834187826
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,32,128,1,float16,float16,0,0.04358933369318644
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,32,8,128,1,fp8,fp8,0,0.09435199697812398
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,32,128,1,fp8,fp8,0,0.05654400090376536
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,2,128,1,float16,float16,0,0.04282666742801666
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,1,128,1,fp8,fp8,0,0.05050666630268097
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,2,128,1,float16,fp8,0,0.0425546665986379
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,4,128,1,float16,float16,0,0.042949333786964417
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,4,128,1,float16,fp8,0,0.04318933188915253
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,2,128,1,fp8,fp8,0,0.050767997900644936
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,8,128,1,float16,fp8,0,0.04369066655635834
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,8,128,1,fp8,fp8,0,0.05261866748332977
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,32,128,1,float16,float16,0,0.027615999182065327
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,4,128,1,fp8,fp8,0,0.051072001457214355
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,32,8,128,1,float16,float16,0,0.04391466577847799
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,32,128,1,fp8,fp8,0,0.03323733309904734
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,1,128,1,float16,float16,0,0.027045334378878277
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,32,128,1,float16,fp8,0,0.02739733209212621
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,1,128,1,float16,fp8,0,0.026767998933792114
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,2,128,1,float16,float16,0,0.026858667532602947
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,1,128,1,fp8,fp8,0,0.031632001201311745
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,4,128,1,float16,float16,0,0.02743999908367793
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,8,128,1,float16,fp8,0,0.02754133443037669
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,4,128,1,fp8,fp8,0,0.032272001107533775
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,8,128,1,float16,float16,0,0.027210667729377747
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,4,128,1,float16,fp8,0,0.027461332579453785
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,8,128,1,fp8,fp8,0,0.03286933402220408
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,2,128,1,float16,fp8,0,0.026917333404223125
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,32,2,128,1,fp8,fp8,0,0.031370667119820915
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,32,128,1,float16,fp8,0,0.020101333657900494
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,32,128,1,float16,float16,0,0.020021333048741024
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,1,128,1,fp8,fp8,0,0.021712000171343487
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,1,128,1,float16,float16,0,0.018458666900793713
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,32,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,2,128,1,fp8,fp8,0,0.021759999295075733
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,1,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,4,128,1,float16,float16,0,0.018895999838908512
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,2,128,1,float16,float16,0,0.018800000349680584
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,4,128,1,float16,fp8,0,0.019141333798567455
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,4,128,1,fp8,fp8,0,0.022592000663280487
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,2,128,1,float16,fp8,0,0.01926933353145917
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,8,128,1,float16,fp8,0,0.019776000330845516
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,8,128,1,float16,float16,0,0.019002666076024372
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,32,128,1,float16,float16,0,0.015861333658297855
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,32,8,128,1,fp8,fp8,0,0.022495999932289124
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,32,128,1,fp8,fp8,0,0.01791999985774358
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,1,128,1,float16,float16,0,0.014725333700577417
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,1,128,1,float16,fp8,0,0.015429332852363586
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,1,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,32,128,1,float16,fp8,0,0.016197333733240765
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,2,128,1,float16,float16,0,0.014842666685581207
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,2,128,1,float16,fp8,0,0.01533866673707962
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,4,128,1,float16,fp8,0,0.015450666348139444
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,2,128,1,fp8,fp8,0,0.017690667261679966
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,4,128,1,float16,float16,0,0.015360000232855478
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,8,128,1,float16,float16,0,0.015568000574906668
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,4,128,1,fp8,fp8,0,0.01806933308641116
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,32,128,1,float16,float16,0,0.015018666783968607
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,8,128,1,fp8,fp8,0,0.017946666727463405
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,32,128,1,float16,fp8,0,0.015024000157912573
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,32,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,1,128,1,float16,float16,0,0.01443733274936676
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,32,8,128,1,float16,fp8,0,0.01587733378012975
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,1,128,1,float16,fp8,0,0.014981333166360855
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,1,128,1,fp8,fp8,0,0.016794666647911072
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,2,128,1,fp8,fp8,0,0.016645333419243496
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,2,128,1,float16,float16,0,0.014373333503802618
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,2,128,1,float16,fp8,0,0.015087999403476715
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,4,128,1,float16,float16,0,0.014458666245142618
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,4,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,8,128,1,float16,float16,0,0.014554666976133982
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,8,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,32,8,128,1,fp8,fp8,0,0.016677333662907284
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,32,128,1,float16,float16,0,0.014218666901191076
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,32,128,1,float16,fp8,0,0.014373333503802618
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,32,128,1,fp8,fp8,0,0.016085332880417507
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,1,128,1,float16,float16,0,0.014069333672523499
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,1,128,1,float16,fp8,0,0.014725333700577417
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,2,128,1,float16,fp8,0,0.014762666076421738
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,2,128,1,float16,float16,0,0.01403733342885971
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,2,128,1,fp8,fp8,0,0.016208000481128693
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,1,128,1,fp8,fp8,0,0.015930666277805965
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,4,128,1,float16,float16,0,0.013904000322024027
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,4,128,1,float16,fp8,0,0.01482133318980535
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,4,128,1,fp8,fp8,0,0.016282666474580765
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,8,128,1,float16,float16,0,0.014165333161751429
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,8,128,1,float16,fp8,0,0.014778666198253632
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,32,8,128,1,fp8,fp8,0,0.016208000481128693
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,32,1,128,1,float16,float16,0,0.22234133879343668
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,32,1,128,1,fp8,fp8,0,0.2653653422991435
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,32,2,128,1,float16,float16,0,0.22238399585088095
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,32,1,128,1,float16,fp8,0,0.22382400433222452
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,32,2,128,1,float16,fp8,0,0.22272000710169473
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,32,2,128,1,fp8,fp8,0,0.2679413358370463
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,32,4,128,1,float16,float16,0,0.2228320042292277
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,32,4,128,1,float16,fp8,0,0.22287466128667197
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,32,4,128,1,fp8,fp8,0,0.26902933915456134
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,32,8,128,1,float16,float16,0,0.2254026730855306
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,32,128,1,float16,float16,0,0.1172320048014323
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,1,128,1,float16,float16,0,0.11668266852696736
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,32,8,128,1,fp8,fp8,0,0.27296000719070435
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,32,128,1,float16,fp8,0,0.11477866768836975
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,32,128,1,fp8,fp8,0,0.1502079963684082
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,32,8,128,1,float16,fp8,0,0.22570133209228516
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,1,128,1,float16,fp8,0,0.11555733283360799
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,1,128,1,fp8,fp8,0,0.13960533340771994
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,2,128,1,float16,fp8,0,0.1165173351764679
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,2,128,1,float16,float16,0,0.11691733201344807
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,4,128,1,float16,fp8,0,0.1176533301671346
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,4,128,1,float16,float16,0,0.11804800232251485
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,2,128,1,fp8,fp8,0,0.13991467157999674
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,8,128,1,float16,float16,0,0.11825600266456604
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,8,128,1,float16,fp8,0,0.11937600374221802
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,4,128,1,fp8,fp8,0,0.14217600226402283
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,32,8,128,1,fp8,fp8,0,0.14573333660761514
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,32,128,1,float16,float16,0,0.060975998640060425
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,1,128,1,float16,float16,0,0.06547733147939046
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,32,128,1,fp8,fp8,0,0.08245866497357686
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,32,128,1,float16,fp8,0,0.060906668504079185
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,1,128,1,float16,fp8,0,0.0652106652657191
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,1,128,1,fp8,fp8,0,0.0782239983479182
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,2,128,1,float16,fp8,0,0.06524799764156342
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,2,128,1,float16,float16,0,0.06514133512973785
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,2,128,1,fp8,fp8,0,0.07852266728878021
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,4,128,1,float16,fp8,0,0.06530133386452992
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,4,128,1,float16,float16,0,0.06554133196671803
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,8,128,1,float16,float16,0,0.06549333532651265
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,4,128,1,fp8,fp8,0,0.07896533111731212
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,8,128,1,fp8,fp8,0,0.0801386684179306
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,32,8,128,1,float16,fp8,0,0.06606400012969971
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,32,128,1,float16,fp8,0,0.03597866743803024
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,1,128,1,float16,fp8,0,0.03854399919509888
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,32,128,1,fp8,fp8,0,0.046282668908437095
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,1,128,1,float16,float16,0,0.03866666555404663
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,1,128,1,fp8,fp8,0,0.04525866607824961
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,32,128,1,float16,float16,0,0.036159999668598175
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,2,128,1,float16,float16,0,0.038378665844599404
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,2,128,1,fp8,fp8,0,0.04584000011285146
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,2,128,1,float16,fp8,0,0.03876800090074539
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,4,128,1,float16,float16,0,0.03895466774702072
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,4,128,1,float16,fp8,0,0.03892799963553747
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,8,128,1,fp8,fp8,0,0.04674666623274485
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,8,128,1,float16,float16,0,0.039066667358080544
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,4,128,1,fp8,fp8,0,0.04636266827583313
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,32,128,1,float16,float16,0,0.024453334510326385
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,32,8,128,1,float16,fp8,0,0.03930133332808813
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,32,128,1,float16,fp8,0,0.02479466547568639
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,1,128,1,float16,float16,0,0.024442667762438457
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,32,128,1,fp8,fp8,0,0.029461334149042766
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,1,128,1,float16,fp8,0,0.024495999018351238
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,2,128,1,float16,float16,0,0.02454400062561035
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,2,128,1,float16,fp8,0,0.024826665719350178
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,4,128,1,float16,fp8,0,0.025120000044504803
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,4,128,1,fp8,fp8,0,0.029359998802344005
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,4,128,1,float16,float16,0,0.024586667617162068
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,8,128,1,float16,float16,0,0.024842667082945507
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,2,128,1,fp8,fp8,0,0.029120000700155895
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,32,128,1,float16,float16,0,0.018533332894245785
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,8,128,1,fp8,fp8,0,0.029509333272775013
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,32,128,1,fp8,fp8,0,0.021541332205136616
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,32,8,128,1,float16,fp8,0,0.024853333830833435
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,1,128,1,float16,float16,0,0.017903999735911686
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,2,128,1,float16,float16,0,0.017770666629076004
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,32,128,1,float16,fp8,0,0.01878400022784869
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,2,128,1,float16,fp8,0,0.018191999445358913
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,1,128,1,float16,fp8,0,0.0179626668492953
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,2,128,1,fp8,fp8,0,0.021183999876181286
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,1,128,1,fp8,fp8,0,0.020954666038354237
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,4,128,1,fp8,fp8,0,0.021418665846188862
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,8,128,1,float16,fp8,0,0.01848000039656957
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,4,128,1,float16,float16,0,0.01794133335351944
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,8,128,1,fp8,fp8,0,0.021829334398110706
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,32,128,1,float16,float16,0,0.015178666760524115
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,8,128,1,float16,float16,0,0.01794133335351944
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,1,128,1,float16,float16,0,0.014912000546852747
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,32,4,128,1,float16,fp8,0,0.017935999979575474
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,32,128,1,fp8,fp8,0,0.017488000293572743
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,32,128,1,float16,fp8,0,0.015520000209410986
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,1,128,1,float16,fp8,0,0.015461333096027374
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,2,128,1,float16,float16,0,0.014938666174809137
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,1,128,1,fp8,fp8,0,0.01730666682124138
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,2,128,1,fp8,fp8,0,0.01736533393462499
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,4,128,1,float16,fp8,0,0.015504000087579092
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,8,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,2,128,1,float16,fp8,0,0.015365333606799444
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,8,128,1,float16,float16,0,0.015029333531856537
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,32,128,1,float16,fp8,0,0.014741333822409311
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,32,128,1,float16,float16,0,0.014549333602190018
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,4,128,1,float16,float16,0,0.014943999548753103
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,4,128,1,fp8,fp8,0,0.017616000026464462
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,32,8,128,1,fp8,fp8,0,0.017551999539136887
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,32,128,1,fp8,fp8,0,0.016528000434239704
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,2,128,1,float16,float16,0,0.014090667168299357
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,1,128,1,fp8,fp8,0,0.016421332955360413
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,1,128,1,float16,fp8,0,0.01461333284775416
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,1,128,1,float16,float16,0,0.01413333291808764
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,4,128,1,float16,fp8,0,0.014698666830857595
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,4,128,1,float16,float16,0,0.014261333892742792
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,8,128,1,float16,float16,0,0.014463999619086584
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,2,128,1,fp8,fp8,0,0.016309333344300587
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,2,128,1,float16,fp8,0,0.014671999961137772
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,8,128,1,float16,fp8,0,0.014688000082969666
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,32,128,1,float16,float16,0,0.01403733342885971
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,32,128,1,fp8,fp8,0,0.016021333634853363
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,32,128,1,float16,fp8,0,0.014384000251690546
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,1,128,1,float16,float16,0,0.013962666193644205
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,32,8,128,1,fp8,fp8,0,0.016762666404247284
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,2,128,1,fp8,fp8,0,0.016293333222468693
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,2,128,1,float16,fp8,0,0.014629332969586054
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,2,128,1,float16,float16,0,0.013823999712864557
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,1,128,1,float16,fp8,0,0.014592000593741735
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,1,128,1,fp8,fp8,0,0.01605333387851715
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,4,128,1,float16,float16,0,0.013936000565687815
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,4,128,1,float16,fp8,0,0.014538666854302088
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,8,128,1,float16,fp8,0,0.01469333345691363
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,8,128,1,float16,float16,0,0.013999999811251959
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,8,128,1,fp8,fp8,0,0.016149333367745083
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,32,4,128,1,fp8,fp8,0,0.016085332880417507
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,24,1,128,1,float16,float16,0,24.517995198567707
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,24,2,128,1,fp8,fp8,0,16.29749298095703
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,24,1,128,1,fp8,fp8,0,16.06650670369466
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,24,2,128,1,float16,float16,0,24.615572611490887
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,24,1,128,1,float16,fp8,0,24.82945505777995
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,24,2,128,1,float16,fp8,0,24.336016337076824
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,24,4,128,1,float16,float16,0,24.8110834757487
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,24,4,128,1,float16,fp8,0,24.741989135742188
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,1,128,1,float16,float16,0,12.500624338785807
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,24,4,128,1,fp8,fp8,0,16.049930572509766
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,24,128,1,float16,fp8,0,12.03277333577474
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,24,8,128,1,float16,fp8,0,24.85534922281901
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,24,8,128,1,float16,float16,0,24.985445658365887
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,24,128,1,float16,float16,0,12.500155131022135
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,24,8,128,1,fp8,fp8,0,16.186517079671223
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,24,128,1,fp8,fp8,0,8.11236826578776
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,1,128,1,fp8,fp8,0,7.986661275227864
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,1,128,1,float16,fp8,0,12.489200592041016
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,2,128,1,float16,float16,0,12.340260823567709
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,2,128,1,float16,fp8,0,12.420052846272787
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,2,128,1,fp8,fp8,0,8.06817626953125
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,4,128,1,float16,float16,0,12.6658935546875
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,4,128,1,fp8,fp8,0,8.202143987019857
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,4,128,1,float16,fp8,0,12.428581237792969
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,8,128,1,float16,float16,0,12.44979731241862
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,8,128,1,float16,fp8,0,12.51589330037435
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,1,128,1,float16,fp8,0,6.199525197347005
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,1,128,1,float16,float16,0,6.297578811645508
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,24,8,128,1,fp8,fp8,0,8.126842498779297
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,24,128,1,float16,float16,0,6.051055908203125
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,24,128,1,float16,fp8,0,6.189247767130534
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,1,128,1,fp8,fp8,0,4.023989359537761
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,2,128,1,float16,float16,0,6.125738779703776
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,24,128,1,fp8,fp8,0,3.971525192260742
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,2,128,1,float16,fp8,0,6.244266510009766
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,2,128,1,fp8,fp8,0,3.9437761306762695
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,4,128,1,float16,float16,0,6.205738703409831
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,4,128,1,float16,fp8,0,6.1200002034505205
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,4,128,1,fp8,fp8,0,4.067488034566243
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,8,128,1,float16,float16,0,6.0833384195963545
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,24,128,1,float16,float16,0,3.0365705490112305
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,8,128,1,float16,fp8,0,6.229738871256511
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,1,128,1,float16,float16,0,3.141749382019043
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,1,128,1,float16,fp8,0,3.075039863586426
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,24,128,1,float16,fp8,0,3.023493448893229
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,24,8,128,1,fp8,fp8,0,4.114725430806478
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,1,128,1,fp8,fp8,0,2.0855093002319336
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,24,128,1,fp8,fp8,0,2.048410733540853
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,2,128,1,float16,fp8,0,3.0784692764282227
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,2,128,1,float16,float16,0,3.0850613911946616
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,2,128,1,fp8,fp8,0,2.08681058883667
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,4,128,1,float16,float16,0,3.1247307459513345
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,4,128,1,float16,fp8,0,3.1593974431355796
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,4,128,1,fp8,fp8,0,2.097658634185791
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,8,128,1,float16,float16,0,3.1093066533406577
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,8,128,1,float16,fp8,0,3.107930819193522
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,24,8,128,1,fp8,fp8,0,2.1103413899739585
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,24,1,128,1,fp8,fp8,0,9.506944020589193
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,24,1,128,1,float16,float16,0,14.341962178548178
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,24,1,128,1,float16,fp8,0,14.247968037923178
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,24,2,128,1,float16,float16,0,14.405354817708334
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,24,2,128,1,float16,fp8,0,14.3560422261556
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,24,2,128,1,fp8,fp8,0,9.357840220133463
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,24,4,128,1,float16,float16,0,14.1606076558431
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,24,4,128,1,float16,fp8,0,14.357781728108725
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,1,128,1,float16,float16,0,7.240682601928711
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,24,4,128,1,fp8,fp8,0,9.51261838277181
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,24,8,128,1,float16,float16,0,14.433834075927734
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,24,8,128,1,float16,fp8,0,14.379093170166016
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,24,128,1,float16,float16,0,7.107114791870117
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,24,128,1,float16,fp8,0,6.95303471883138
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,1,128,1,float16,fp8,0,7.127370834350586
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,24,8,128,1,fp8,fp8,0,9.459941228230795
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,1,128,1,fp8,fp8,0,4.618501345316569
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,24,128,1,fp8,fp8,0,4.493722597757976
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,2,128,1,float16,float16,0,7.141583760579427
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,2,128,1,float16,fp8,0,7.07473627726237
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,2,128,1,fp8,fp8,0,4.749973297119141
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,4,128,1,fp8,fp8,0,4.5547787348429365
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,4,128,1,float16,fp8,0,7.075109481811523
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,4,128,1,float16,float16,0,7.079231897989909
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,8,128,1,float16,fp8,0,7.074170430501302
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,8,128,1,float16,float16,0,7.322624206542969
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,1,128,1,float16,float16,0,3.4713172912597656
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,1,128,1,float16,fp8,0,3.4775733947753906
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,1,128,1,fp8,fp8,0,2.332159996032715
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,24,128,1,float16,float16,0,3.5140854517618814
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,24,8,128,1,fp8,fp8,0,4.670570691426595
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,2,128,1,float16,float16,0,3.5637388229370117
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,2,128,1,float16,fp8,0,3.4450505574544272
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,24,128,1,fp8,fp8,0,2.3673013051350913
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,2,128,1,fp8,fp8,0,2.341541290283203
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,24,128,1,float16,fp8,0,3.3881759643554688
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,4,128,1,float16,float16,0,3.503472010294596
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,4,128,1,fp8,fp8,0,2.351802666982015
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,4,128,1,float16,fp8,0,3.560234705607096
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,8,128,1,float16,float16,0,3.5450827280680337
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,24,128,1,float16,float16,0,1.8023145993550618
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,8,128,1,float16,fp8,0,3.471680005391439
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,1,128,1,float16,float16,0,1.8431040445963542
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,1,128,1,float16,fp8,0,1.8192532857259114
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,24,8,128,1,fp8,fp8,0,2.381333351135254
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,24,128,1,float16,fp8,0,1.7972052892049153
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,1,128,1,fp8,fp8,0,1.2786506811777751
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,24,128,1,fp8,fp8,0,1.259930690129598
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,2,128,1,float16,fp8,0,1.8357226053873699
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,2,128,1,fp8,fp8,0,1.2814613183339436
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,4,128,1,float16,float16,0,1.8425386746724446
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,2,128,1,float16,float16,0,1.8364906311035156
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,4,128,1,float16,fp8,0,1.8449172973632812
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,4,128,1,fp8,fp8,0,1.278773307800293
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,8,128,1,float16,float16,0,1.8619573911031086
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,8,128,1,float16,fp8,0,1.8207839330037434
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,24,8,128,1,fp8,fp8,0,1.289562702178955
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,24,1,128,1,float16,float16,0,10.254554748535156
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,24,1,128,1,float16,fp8,0,10.028021494547525
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,24,1,128,1,fp8,fp8,0,6.6226450602213545
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,24,2,128,1,fp8,fp8,0,6.6809336344401045
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,24,2,128,1,float16,fp8,0,10.321269353230795
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,24,2,128,1,float16,float16,0,10.36245346069336
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,24,4,128,1,float16,float16,0,10.09933344523112
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,24,4,128,1,float16,fp8,0,10.138442357381185
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,1,128,1,float16,float16,0,4.995029449462891
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,24,4,128,1,fp8,fp8,0,6.857199986775716
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,24,8,128,1,float16,float16,0,10.234170913696289
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,24,128,1,float16,fp8,0,5.051919937133789
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,24,128,1,float16,float16,0,4.867402712504069
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,24,8,128,1,fp8,fp8,0,6.8219146728515625
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,1,128,1,float16,fp8,0,5.006362597147624
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,24,128,1,fp8,fp8,0,3.3262081146240234
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,1,128,1,fp8,fp8,0,3.252885182698568
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,24,8,128,1,float16,fp8,0,10.094517389933268
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,2,128,1,float16,float16,0,4.804160118103027
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,2,128,1,float16,fp8,0,4.861573219299316
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,2,128,1,fp8,fp8,0,3.253039995829264
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,4,128,1,float16,fp8,0,5.079733212788899
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,4,128,1,float16,float16,0,4.835946718851726
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,4,128,1,fp8,fp8,0,3.272261301676432
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,8,128,1,float16,float16,0,5.063167889912923
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,1,128,1,float16,float16,0,2.513136068979899
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,1,128,1,float16,fp8,0,2.4365439414978027
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,8,128,1,float16,fp8,0,5.071455955505371
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,24,128,1,float16,float16,0,2.514992078145345
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,1,128,1,fp8,fp8,0,1.704202651977539
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,24,8,128,1,fp8,fp8,0,3.2881971995035806
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,24,128,1,float16,fp8,0,2.4434293111165366
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,24,128,1,fp8,fp8,0,1.7282932599385579
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,2,128,1,float16,float16,0,2.5005332628885903
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,2,128,1,float16,fp8,0,2.4384586016337075
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,2,128,1,fp8,fp8,0,1.7054293950398762
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,4,128,1,float16,float16,0,2.5173439979553223
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,4,128,1,float16,fp8,0,2.4681065877278647
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,4,128,1,fp8,fp8,0,1.7134346961975098
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,8,128,1,float16,fp8,0,2.491408030192057
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,24,128,1,float16,float16,0,1.3029279708862305
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,8,128,1,float16,float16,0,2.4991520245869956
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,24,8,128,1,fp8,fp8,0,1.7356266975402832
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,24,128,1,float16,fp8,0,1.3060373465220134
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,1,128,1,float16,float16,0,1.340138594309489
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,1,128,1,float16,fp8,0,1.3205119768778484
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,24,128,1,fp8,fp8,0,0.9230773448944092
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,1,128,1,fp8,fp8,0,0.8972799777984619
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,2,128,1,float16,fp8,0,1.3252747058868408
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,2,128,1,fp8,fp8,0,0.9021920363108317
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,2,128,1,float16,float16,0,1.3331573009490967
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,4,128,1,float16,float16,0,1.339296023050944
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,4,128,1,float16,fp8,0,1.3296106656392415
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,4,128,1,fp8,fp8,0,0.9003360271453857
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,8,128,1,float16,float16,0,1.3571093877156575
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,8,128,1,float16,fp8,0,1.3404426574707031
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,24,8,128,1,fp8,fp8,0,0.9111999670664469
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,24,1,128,1,float16,float16,0,13.527386983235678
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,24,1,128,1,float16,fp8,0,13.118896484375
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,24,1,128,1,fp8,fp8,0,8.887930552164713
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,24,2,128,1,float16,fp8,0,13.374245961507162
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,24,2,128,1,fp8,fp8,0,8.87767473856608
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,24,2,128,1,float16,float16,0,13.282928466796875
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,24,4,128,1,float16,fp8,0,13.540042877197266
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,24,4,128,1,float16,float16,0,13.440704345703125
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,1,128,1,float16,float16,0,6.713413238525391
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,24,4,128,1,fp8,fp8,0,9.077866872151693
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,24,8,128,1,float16,float16,0,13.321301778157553
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,24,128,1,float16,float16,0,6.577562967936198
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,24,128,1,float16,fp8,0,6.569679896036784
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,24,8,128,1,float16,fp8,0,13.495557149251303
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,1,128,1,float16,fp8,0,6.452250798543294
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,24,8,128,1,fp8,fp8,0,9.112106959025065
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,1,128,1,fp8,fp8,0,4.365104039510091
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,24,128,1,fp8,fp8,0,4.37606938680013
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,2,128,1,float16,float16,0,6.483968098958333
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,2,128,1,fp8,fp8,0,4.363402684529622
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,2,128,1,float16,fp8,0,6.691098531087239
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,4,128,1,float16,float16,0,6.702538808186849
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,4,128,1,float16,fp8,0,6.724357604980469
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,4,128,1,fp8,fp8,0,4.3033599853515625
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,8,128,1,float16,float16,0,6.894938786824544
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,1,128,1,float16,float16,0,3.202138582865397
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,8,128,1,float16,fp8,0,6.678607940673828
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,1,128,1,float16,fp8,0,3.1935946146647134
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,1,128,1,fp8,fp8,0,2.2033653259277344
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,24,128,1,float16,float16,0,3.1701653798421225
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,24,8,128,1,fp8,fp8,0,4.433962821960449
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,2,128,1,float16,float16,0,3.2376960118611655
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,24,128,1,float16,fp8,0,3.1876052220662436
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,2,128,1,float16,fp8,0,3.285445213317871
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,2,128,1,fp8,fp8,0,2.1973066329956055
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,24,128,1,fp8,fp8,0,2.254021326700846
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,4,128,1,float16,float16,0,3.271946589152018
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,4,128,1,float16,fp8,0,3.169621467590332
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,4,128,1,fp8,fp8,0,2.207909266153971
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,8,128,1,float16,float16,0,3.230714797973633
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,24,128,1,float16,float16,0,1.64738130569458
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,24,128,1,float16,fp8,0,1.6430613199869792
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,8,128,1,fp8,fp8,0,2.2383039792378745
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,24,8,128,1,float16,fp8,0,3.2763147354125977
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,1,128,1,float16,float16,0,1.6695733070373535
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,1,128,1,float16,fp8,0,1.6374133427937825
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,1,128,1,fp8,fp8,0,1.1697279612223308
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,24,128,1,fp8,fp8,0,1.1937226454416912
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,2,128,1,fp8,fp8,0,1.1699679692586262
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,2,128,1,float16,fp8,0,1.6467466354370117
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,2,128,1,float16,float16,0,1.6673973401387532
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,4,128,1,float16,float16,0,1.6871466636657715
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,4,128,1,float16,fp8,0,1.6590026219685872
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,4,128,1,fp8,fp8,0,1.1742773056030273
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,1,128,1,float16,float16,0,0.918837308883667
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,8,128,1,float16,float16,0,1.6897652943929036
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,8,128,1,float16,fp8,0,1.675487995147705
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,1,128,1,float16,fp8,0,0.9005066553751627
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,24,128,1,float16,float16,0,0.9023839632670084
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,24,128,1,float16,fp8,0,0.8936800161997477
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,24,8,128,1,fp8,fp8,0,1.1850132942199707
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,24,128,1,fp8,fp8,0,0.640176018079122
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,1,128,1,fp8,fp8,0,0.6212693452835083
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,2,128,1,float16,float16,0,0.9201066493988037
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,2,128,1,float16,fp8,0,0.9104373455047607
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,2,128,1,fp8,fp8,0,0.6227946678797404
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,4,128,1,float16,fp8,0,0.9057333469390869
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,4,128,1,fp8,fp8,0,0.6230239868164062
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,4,128,1,float16,float16,0,0.9190719922383627
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,8,128,1,float16,float16,0,0.926741361618042
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,8,128,1,float16,fp8,0,0.9128639698028564
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,24,8,128,1,fp8,fp8,0,0.6315413316090902
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,24,1,128,1,fp8,fp8,0,5.283189455668132
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,24,1,128,1,float16,float16,0,7.934421539306641
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,24,1,128,1,float16,fp8,0,7.703903834025065
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,24,2,128,1,float16,float16,0,8.010394414265951
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,24,2,128,1,fp8,fp8,0,5.356138865152995
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,24,2,128,1,float16,fp8,0,7.739946365356445
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,24,4,128,1,float16,float16,0,7.949013392130534
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,24,4,128,1,float16,fp8,0,7.861722946166992
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,1,128,1,float16,float16,0,3.740426699320475
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,24,4,128,1,fp8,fp8,0,5.334298451741536
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,24,8,128,1,float16,float16,0,7.985861460367839
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,24,8,128,1,float16,fp8,0,8.01305071512858
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,1,128,1,float16,fp8,0,3.6964852015177407
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,24,128,1,float16,fp8,0,3.706378618876139
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,24,8,128,1,fp8,fp8,0,5.5518239339192705
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,24,128,1,float16,float16,0,3.7963892618815103
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,24,128,1,fp8,fp8,0,2.7086559931437173
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,1,128,1,fp8,fp8,0,2.603381315867106
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,2,128,1,float16,float16,0,3.8868799209594727
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,2,128,1,float16,fp8,0,3.7803465525309243
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,2,128,1,fp8,fp8,0,2.623018741607666
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,4,128,1,float16,float16,0,3.887434641520182
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,4,128,1,float16,fp8,0,3.878602663675944
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,4,128,1,fp8,fp8,0,2.6572853724161782
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,8,128,1,float16,float16,0,3.7660319010416665
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,8,128,1,float16,fp8,0,3.8521013259887695
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,1,128,1,float16,float16,0,1.913856029510498
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,1,128,1,float16,fp8,0,1.9058133761088054
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,24,128,1,float16,fp8,0,1.924890677134196
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,24,128,1,float16,float16,0,1.9015146891276042
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,1,128,1,fp8,fp8,0,1.3545066515604656
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,24,8,128,1,fp8,fp8,0,2.677349408467611
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,2,128,1,float16,float16,0,1.9103412628173828
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,2,128,1,float16,fp8,0,1.893664042154948
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,2,128,1,fp8,fp8,0,1.3610399564107258
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,24,128,1,fp8,fp8,0,1.4062933921813965
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,4,128,1,fp8,fp8,0,1.3645280202229817
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,4,128,1,float16,float16,0,1.9350080490112305
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,4,128,1,float16,fp8,0,1.8863412539164226
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,8,128,1,float16,float16,0,1.948021411895752
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,8,128,1,float16,fp8,0,1.9076159795125325
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,24,128,1,float16,float16,0,1.0233813126881917
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,24,128,1,float16,fp8,0,1.0062452952067058
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,24,8,128,1,fp8,fp8,0,1.3821333249409993
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,1,128,1,float16,float16,0,1.0232480367024739
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,1,128,1,float16,fp8,0,1.0002293586730957
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,1,128,1,fp8,fp8,0,0.7354239622751871
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,2,128,1,float16,float16,0,1.0198079744974773
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,2,128,1,float16,fp8,0,1.013055960337321
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,2,128,1,fp8,fp8,0,0.7349653244018555
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,24,128,1,fp8,fp8,0,0.7509173552195231
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,4,128,1,float16,float16,0,1.0217119852701824
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,4,128,1,float16,fp8,0,1.0144106547037761
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,4,128,1,fp8,fp8,0,0.7387039661407471
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,8,128,1,float16,float16,0,1.0323039690653484
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,1,128,1,float16,float16,0,0.5740106503168741
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,8,128,1,float16,fp8,0,1.0120853583017986
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,1,128,1,float16,fp8,0,0.564138650894165
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,24,128,1,float16,float16,0,0.5651573340098063
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,24,8,128,1,fp8,fp8,0,0.7487786610921224
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,24,128,1,float16,fp8,0,0.5638399918874105
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,24,128,1,fp8,fp8,0,0.4059520165125529
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,1,128,1,fp8,fp8,0,0.3962719837824504
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,2,128,1,float16,float16,0,0.5752960046132406
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,2,128,1,fp8,fp8,0,0.3951733509699504
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,2,128,1,float16,fp8,0,0.5646560192108154
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,4,128,1,float16,fp8,0,0.5701546669006348
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,4,128,1,float16,float16,0,0.5761280059814453
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,4,128,1,fp8,fp8,0,0.399616003036499
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,8,128,1,float16,float16,0,0.5760533412297567
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,8,128,1,float16,fp8,0,0.5735146601994833
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,24,8,128,1,fp8,fp8,0,0.4013599952061971
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,24,1,128,1,float16,float16,0,7.619354883829753
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,24,1,128,1,float16,fp8,0,7.543237050374349
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,24,1,128,1,fp8,fp8,0,5.24073600769043
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,24,2,128,1,float16,fp8,0,7.55135981241862
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,24,2,128,1,fp8,fp8,0,5.300607999165853
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,24,2,128,1,float16,float16,0,7.503642400105794
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,24,4,128,1,float16,float16,0,7.798671722412109
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,24,4,128,1,float16,fp8,0,7.798730850219727
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,1,128,1,float16,float16,0,3.729882558186849
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,24,4,128,1,fp8,fp8,0,5.255551973978679
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,24,128,1,float16,float16,0,3.8324693044026694
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,24,8,128,1,float16,float16,0,7.65290641784668
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,1,128,1,float16,fp8,0,3.586341222127279
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,24,8,128,1,float16,fp8,0,7.763935724894206
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,24,8,128,1,fp8,fp8,0,5.462869644165039
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,24,128,1,float16,fp8,0,3.6517120997111
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,24,128,1,fp8,fp8,0,2.797077178955078
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,1,128,1,fp8,fp8,0,2.621514638264974
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,2,128,1,float16,float16,0,3.6417547861735025
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,2,128,1,float16,fp8,0,3.6770188013712564
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,2,128,1,fp8,fp8,0,2.642730712890625
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,4,128,1,float16,float16,0,3.7337706883748374
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,4,128,1,float16,fp8,0,3.597087860107422
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,4,128,1,fp8,fp8,0,2.657573382059733
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,8,128,1,float16,float16,0,3.815066655476888
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,8,128,1,float16,fp8,0,3.5977760950724282
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,1,128,1,float16,float16,0,1.8438827196757
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,1,128,1,float16,fp8,0,1.808069388071696
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,1,128,1,fp8,fp8,0,1.3423253695170085
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,24,128,1,float16,float16,0,1.8701653480529785
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,24,128,1,float16,fp8,0,1.8764959971110027
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,2,128,1,float16,float16,0,1.8356107076009114
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,24,8,128,1,fp8,fp8,0,2.70362122853597
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,2,128,1,float16,fp8,0,1.8178079922993977
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,2,128,1,fp8,fp8,0,1.3465813000996907
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,24,128,1,fp8,fp8,0,1.4364426930745442
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,4,128,1,float16,float16,0,1.8636213938395183
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,4,128,1,float16,fp8,0,1.8235360781351726
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,4,128,1,fp8,fp8,0,1.3595412572224934
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,8,128,1,float16,float16,0,1.856869379679362
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,8,128,1,float16,fp8,0,1.845706621805827
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,1,128,1,float16,float16,0,0.9607786337534586
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,1,128,1,float16,fp8,0,0.951573371887207
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,24,128,1,float16,float16,0,0.9732586542765299
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,24,8,128,1,fp8,fp8,0,1.385248025258382
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,24,128,1,float16,fp8,0,0.9790293375651041
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,1,128,1,fp8,fp8,0,0.7088373502095541
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,24,128,1,fp8,fp8,0,0.7582186857859293
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,2,128,1,float16,float16,0,0.9647093613942465
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,2,128,1,float16,fp8,0,0.947930653889974
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,2,128,1,fp8,fp8,0,0.7130933602650961
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,4,128,1,float16,fp8,0,0.9559253056844076
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,4,128,1,float16,float16,0,0.9678080081939697
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,4,128,1,fp8,fp8,0,0.721407969792684
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,8,128,1,float16,float16,0,0.9729333718617758
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,8,128,1,float16,fp8,0,0.9681173165639242
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,24,128,1,float16,float16,0,0.5270453294118246
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,24,8,128,1,fp8,fp8,0,0.7306773662567139
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,24,128,1,float16,fp8,0,0.5273439884185791
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,1,128,1,float16,float16,0,0.5268586476643881
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,24,128,1,fp8,fp8,0,0.4065013329188029
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,1,128,1,fp8,fp8,0,0.37645332018534344
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,2,128,1,float16,float16,0,0.5317440032958984
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,1,128,1,float16,fp8,0,0.5188479820887247
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,2,128,1,float16,fp8,0,0.5187520186106364
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,4,128,1,float16,float16,0,0.5316853523254395
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,2,128,1,fp8,fp8,0,0.3784266710281372
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,4,128,1,float16,fp8,0,0.522218664487203
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,4,128,1,fp8,fp8,0,0.3824053208033244
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,8,128,1,float16,float16,0,0.5319786469141642
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,8,128,1,float16,fp8,0,0.5280373493830363
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,1,128,1,float16,float16,0,0.2756693363189697
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,24,128,1,float16,float16,0,0.2804479996363322
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,24,8,128,1,fp8,fp8,0,0.38809065024058026
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,1,128,1,float16,fp8,0,0.27135999997456867
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,24,128,1,float16,fp8,0,0.2791680097579956
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,24,128,1,fp8,fp8,0,0.2281173268953959
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,1,128,1,fp8,fp8,0,0.21873066822687784
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,2,128,1,float16,float16,0,0.2777013381322225
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,2,128,1,float16,fp8,0,0.27038933833440143
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,4,128,1,float16,fp8,0,0.27320533990859985
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,2,128,1,fp8,fp8,0,0.21853333711624146
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,4,128,1,fp8,fp8,0,0.21995733181635538
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,4,128,1,float16,float16,0,0.2789439956347148
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,8,128,1,float16,float16,0,0.27825067440668744
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,8,128,1,float16,fp8,0,0.2735466758410136
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,24,8,128,1,fp8,fp8,0,0.22510933876037598
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,24,1,128,1,float16,float16,0,4.507055918375651
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,24,1,128,1,fp8,fp8,0,3.3089812596639
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,24,1,128,1,float16,fp8,0,4.514474550882976
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,24,2,128,1,float16,float16,0,4.608522733052571
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,24,2,128,1,float16,fp8,0,4.413471857706706
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,24,2,128,1,fp8,fp8,0,3.347536087036133
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,24,4,128,1,float16,float16,0,4.482746760050456
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,24,4,128,1,float16,fp8,0,4.535264015197754
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,1,128,1,float16,float16,0,2.2453014055887857
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,24,4,128,1,fp8,fp8,0,3.3476905822753906
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,1,128,1,float16,fp8,0,2.201279958089193
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,24,128,1,float16,float16,0,2.325167973836263
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,24,8,128,1,float16,float16,0,4.665333429972331
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,24,128,1,float16,fp8,0,2.2841386795043945
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,24,8,128,1,float16,fp8,0,4.6128800710042315
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,24,128,1,fp8,fp8,0,1.7916000684102376
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,24,8,128,1,fp8,fp8,0,3.4168853759765625
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,1,128,1,fp8,fp8,0,1.6666560173034668
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,2,128,1,float16,float16,0,2.2505973180135093
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,2,128,1,fp8,fp8,0,1.6731467247009277
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,2,128,1,float16,fp8,0,2.211695988972982
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,4,128,1,float16,float16,0,2.2552587191263833
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,4,128,1,float16,fp8,0,2.2108586629231772
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,4,128,1,fp8,fp8,0,1.6929972966512044
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,8,128,1,float16,float16,0,2.284778594970703
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,1,128,1,float16,float16,0,1.1495412985483806
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,8,128,1,float16,fp8,0,2.2308533986409507
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,1,128,1,float16,fp8,0,1.1228906313578289
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,1,128,1,fp8,fp8,0,0.8621866703033447
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,24,128,1,float16,float16,0,1.1749013264973958
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,24,8,128,1,fp8,fp8,0,1.7340586980183919
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,2,128,1,float16,float16,0,1.1536586284637451
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,24,128,1,fp8,fp8,0,0.9300106366475424
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,2,128,1,float16,fp8,0,1.1270879904429119
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,2,128,1,fp8,fp8,0,0.8651519616444906
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,24,128,1,float16,fp8,0,1.1803733507792156
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,4,128,1,float16,float16,0,1.1599253018697102
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,4,128,1,float16,fp8,0,1.1344906489054363
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,4,128,1,fp8,fp8,0,0.8781332969665527
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,8,128,1,float16,float16,0,1.1677333513895671
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,8,128,1,float16,fp8,0,1.1472533543904622
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,24,8,128,1,fp8,fp8,0,0.8906773726145426
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,24,128,1,float16,float16,0,0.6159306764602661
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,1,128,1,float16,float16,0,0.6093066533406576
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,1,128,1,float16,fp8,0,0.6003359953562418
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,24,128,1,float16,fp8,0,0.6207253138224283
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,1,128,1,fp8,fp8,0,0.46561598777770996
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,24,128,1,fp8,fp8,0,0.5007733503977457
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,2,128,1,float16,fp8,0,0.5996640125910441
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,2,128,1,float16,float16,0,0.6095893383026123
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,2,128,1,fp8,fp8,0,0.46616534392038983
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,4,128,1,float16,float16,0,0.6136480172475179
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,4,128,1,float16,fp8,0,0.6061439911524454
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,4,128,1,fp8,fp8,0,0.47244266668955487
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,8,128,1,float16,float16,0,0.6201546589533488
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,1,128,1,float16,float16,0,0.3420480092366536
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,8,128,1,float16,fp8,0,0.6108213265736898
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,24,128,1,float16,float16,0,0.34411199887593585
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,24,128,1,float16,fp8,0,0.33992000420888263
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,24,8,128,1,fp8,fp8,0,0.4794880151748657
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,24,128,1,fp8,fp8,0,0.26977066198984784
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,1,128,1,float16,fp8,0,0.3348960081736247
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,1,128,1,fp8,fp8,0,0.25006399552027386
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,2,128,1,float16,float16,0,0.3440053462982178
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,2,128,1,float16,fp8,0,0.33534399668375653
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,4,128,1,float16,float16,0,0.34304531415303546
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,4,128,1,float16,fp8,0,0.3370399872461955
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,4,128,1,fp8,fp8,0,0.25251199801762897
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,2,128,1,fp8,fp8,0,0.24941333134969076
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,8,128,1,float16,fp8,0,0.34196265538533527
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,8,128,1,float16,float16,0,0.34730132420857746
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,24,8,128,1,fp8,fp8,0,0.2577280004819234
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,24,128,1,float16,float16,0,0.18754667043685913
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,1,128,1,float16,float16,0,0.18155733744303384
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,24,128,1,float16,fp8,0,0.182970662911733
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,24,128,1,fp8,fp8,0,0.15690132975578308
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,1,128,1,fp8,fp8,0,0.14531733592351279
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,1,128,1,float16,fp8,0,0.17892799774805704
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,2,128,1,float16,fp8,0,0.18067733446756998
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,2,128,1,float16,float16,0,0.18281600872675577
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,2,128,1,fp8,fp8,0,0.14713600277900696
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,4,128,1,float16,float16,0,0.18285866578420004
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,4,128,1,float16,fp8,0,0.17921066284179688
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,4,128,1,fp8,fp8,0,0.14934399724006653
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,8,128,1,float16,float16,0,0.1857759952545166
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,8,128,1,fp8,fp8,0,0.15271466970443726
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,24,8,128,1,float16,fp8,0,0.18099733193715414
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,24,1,128,1,fp8,fp8,0,3.617018699645996
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,24,1,128,1,float16,fp8,0,4.531402587890625
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,24,1,128,1,float16,float16,0,4.739141464233398
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,24,2,128,1,float16,float16,0,4.73195743560791
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,24,2,128,1,float16,fp8,0,4.647215843200684
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,24,2,128,1,fp8,fp8,0,3.6429386138916016
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,24,4,128,1,float16,float16,0,4.802309354146321
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,24,4,128,1,float16,fp8,0,4.775520006815593
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,1,128,1,float16,float16,0,2.329616069793701
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,24,4,128,1,fp8,fp8,0,3.684864044189453
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,24,128,1,float16,float16,0,2.4498133659362793
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,24,128,1,float16,fp8,0,2.398325284322103
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,24,8,128,1,float16,float16,0,4.846096038818359
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,1,128,1,float16,fp8,0,2.2622987429300943
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,24,8,128,1,fp8,fp8,0,3.7489226659139
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,24,128,1,fp8,fp8,0,2.0104853312174478
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,24,8,128,1,float16,fp8,0,4.691424051920573
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,1,128,1,fp8,fp8,0,1.8238239288330078
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,2,128,1,float16,float16,0,2.3392799695332847
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,2,128,1,float16,fp8,0,2.271941343943278
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,2,128,1,fp8,fp8,0,1.830501397450765
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,4,128,1,float16,fp8,0,2.278362592061361
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,4,128,1,float16,float16,0,2.3669652938842773
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,4,128,1,fp8,fp8,0,1.8524959882100422
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,8,128,1,float16,float16,0,2.397813320159912
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,8,128,1,float16,fp8,0,2.3110559781392417
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,1,128,1,float16,fp8,0,1.1511092980702717
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,1,128,1,float16,float16,0,1.1725707054138184
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,24,128,1,float16,fp8,0,1.2244640191396077
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,24,128,1,float16,float16,0,1.2334293524424236
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,24,8,128,1,fp8,fp8,0,1.889296054840088
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,1,128,1,fp8,fp8,0,0.9254079659779867
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,24,128,1,fp8,fp8,0,1.0218346913655598
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,2,128,1,float16,float16,0,1.1752639611562092
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,2,128,1,float16,fp8,0,1.1546346346537273
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,2,128,1,fp8,fp8,0,0.9262293179829916
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,4,128,1,float16,float16,0,1.1880746682484944
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,4,128,1,float16,fp8,0,1.1627946694691975
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,4,128,1,fp8,fp8,0,0.937818686167399
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,8,128,1,float16,float16,0,1.20359468460083
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,1,128,1,float16,float16,0,0.6170560121536255
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,8,128,1,fp8,fp8,0,0.9585119883219401
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,24,8,128,1,float16,fp8,0,1.1732213497161865
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,1,128,1,float16,fp8,0,0.6040319999059042
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,24,128,1,float16,float16,0,0.6416800022125244
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,24,128,1,float16,fp8,0,0.6305226484934489
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,24,128,1,fp8,fp8,0,0.5374826590220133
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,2,128,1,float16,float16,0,0.619269331296285
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,2,128,1,float16,fp8,0,0.6007786591847738
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,2,128,1,fp8,fp8,0,0.4877013365427653
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,4,128,1,float16,fp8,0,0.6090240081151327
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,4,128,1,float16,float16,0,0.6199146509170532
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,1,128,1,fp8,fp8,0,0.488597313563029
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,4,128,1,fp8,fp8,0,0.49266668160756427
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,8,128,1,float16,fp8,0,0.6181759834289551
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,8,128,1,fp8,fp8,0,0.5034613211949667
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,24,128,1,float16,float16,0,0.34249067306518555
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,24,8,128,1,float16,float16,0,0.6281599998474121
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,24,128,1,fp8,fp8,0,0.293178657690684
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,1,128,1,float16,float16,0,0.3343679904937744
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,24,128,1,float16,fp8,0,0.34281599521636963
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,1,128,1,float16,fp8,0,0.32386134068171185
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,1,128,1,fp8,fp8,0,0.2584106723467509
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,2,128,1,float16,float16,0,0.33372267087300617
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,2,128,1,float16,fp8,0,0.32630399862925213
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,2,128,1,fp8,fp8,0,0.25963733593622845
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,4,128,1,float16,float16,0,0.336677352587382
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,4,128,1,fp8,fp8,0,0.2613333264986674
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,4,128,1,float16,fp8,0,0.3303626577059428
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,1,128,1,float16,float16,0,0.17761600017547607
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,24,128,1,float16,float16,0,0.18593599398930868
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,8,128,1,fp8,fp8,0,0.26629332701365155
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,24,128,1,float16,fp8,0,0.1848533352216085
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,8,128,1,float16,float16,0,0.3384000062942505
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,24,128,1,fp8,fp8,0,0.1595093309879303
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,1,128,1,float16,fp8,0,0.17316800355911255
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,24,8,128,1,float16,fp8,0,0.3354293505350749
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,1,128,1,fp8,fp8,0,0.14681599537531534
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,2,128,1,float16,fp8,0,0.1720693310101827
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,2,128,1,float16,float16,0,0.17706666390101114
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,2,128,1,fp8,fp8,0,0.14576533436775208
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,4,128,1,float16,fp8,0,0.17408533891042074
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,4,128,1,float16,float16,0,0.17748800913492838
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,8,128,1,fp8,fp8,0,0.1509866714477539
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,4,128,1,fp8,fp8,0,0.148799995581309
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,24,128,1,float16,float16,0,0.10904533664385478
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,8,128,1,float16,fp8,0,0.17721066872278848
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,24,8,128,1,float16,float16,0,0.18100800116856894
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,24,128,1,float16,fp8,0,0.10843732953071594
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,1,128,1,float16,float16,0,0.10570133725802104
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,1,128,1,float16,fp8,0,0.10430933038393657
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,2,128,1,float16,float16,0,0.10583999752998352
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,2,128,1,float16,fp8,0,0.10512000322341919
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,24,128,1,fp8,fp8,0,0.09826667110125224
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,2,128,1,fp8,fp8,0,0.08824533224105835
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,4,128,1,float16,float16,0,0.10637866457303365
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,4,128,1,fp8,fp8,0,0.08879466851552327
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,1,128,1,fp8,fp8,0,0.088128000497818
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,8,128,1,float16,float16,0,0.10797866185506184
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,4,128,1,float16,fp8,0,0.10469333330790202
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,8,128,1,float16,fp8,0,0.10679466525713603
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,24,8,128,1,fp8,fp8,0,0.09283199906349182
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,24,1,128,1,float16,fp8,0,2.9010613759358725
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,24,1,128,1,fp8,fp8,0,2.428335984547933
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,24,2,128,1,float16,fp8,0,2.9043518702189126
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,24,2,128,1,float16,float16,0,3.038357416788737
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,24,1,128,1,float16,float16,0,3.0181652704874673
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,24,2,128,1,fp8,fp8,0,2.444965362548828
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,24,4,128,1,float16,float16,0,3.0781920750935874
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,24,4,128,1,float16,fp8,0,2.939023971557617
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,1,128,1,float16,float16,0,1.5058612823486328
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,24,4,128,1,fp8,fp8,0,2.4934773445129395
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,24,8,128,1,float16,float16,0,3.0910666783650718
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,24,8,128,1,float16,fp8,0,2.981130599975586
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,24,128,1,fp8,fp8,0,1.3795305887858074
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,24,128,1,float16,fp8,0,1.57206392288208
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,1,128,1,float16,fp8,0,1.4590293566385906
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,24,8,128,1,fp8,fp8,0,2.551392078399658
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,1,128,1,fp8,fp8,0,1.224293311436971
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,24,128,1,float16,float16,0,1.6049866676330566
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,2,128,1,float16,float16,0,1.5110026995340984
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,2,128,1,float16,fp8,0,1.461535930633545
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,2,128,1,fp8,fp8,0,1.2352480093638103
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,4,128,1,float16,float16,0,1.522271951039632
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,4,128,1,fp8,fp8,0,1.2592906951904297
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,4,128,1,float16,fp8,0,1.4762560526529949
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,8,128,1,float16,fp8,0,1.510309378306071
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,8,128,1,float16,float16,0,1.5472745895385742
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,1,128,1,float16,float16,0,0.7701653639475504
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,1,128,1,float16,fp8,0,0.7478666305541992
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,24,128,1,float16,float16,0,0.8084373474121094
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,1,128,1,fp8,fp8,0,0.6294240156809489
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,24,128,1,float16,fp8,0,0.8074506918589274
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,24,8,128,1,fp8,fp8,0,1.280997355779012
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,2,128,1,float16,float16,0,0.7722133000691732
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,2,128,1,float16,fp8,0,0.7530240217844645
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,24,128,1,fp8,fp8,0,0.7081546783447266
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,2,128,1,fp8,fp8,0,0.6320426861445109
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,4,128,1,float16,float16,0,0.776426633199056
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,4,128,1,float16,fp8,0,0.759669303894043
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,4,128,1,fp8,fp8,0,0.6449919939041138
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,8,128,1,float16,float16,0,0.7843573093414307
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,8,128,1,float16,fp8,0,0.7723146279652914
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,1,128,1,float16,fp8,0,0.39715198675791424
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,1,128,1,float16,float16,0,0.4074079990386963
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,24,8,128,1,fp8,fp8,0,0.6614506642023722
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,1,128,1,fp8,fp8,0,0.33371734619140625
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,24,128,1,float16,float16,0,0.4270079930623372
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,2,128,1,float16,float16,0,0.40944000085194904
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,24,128,1,fp8,fp8,0,0.3752266565958659
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,24,128,1,float16,fp8,0,0.42160534858703613
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,2,128,1,float16,fp8,0,0.3998719851175944
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,2,128,1,fp8,fp8,0,0.3375786542892456
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,4,128,1,float16,float16,0,0.4124106566111247
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,4,128,1,float16,fp8,0,0.4013013442357381
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,8,128,1,float16,float16,0,0.4162079890569051
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,4,128,1,fp8,fp8,0,0.34114666779836017
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,8,128,1,float16,fp8,0,0.4065386851628621
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,24,128,1,float16,float16,0,0.23595199982325235
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,1,128,1,float16,float16,0,0.22330133120218912
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,24,8,128,1,fp8,fp8,0,0.3484906752904256
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,24,128,1,float16,fp8,0,0.23283199469248453
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,1,128,1,float16,fp8,0,0.21876267592112222
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,24,128,1,fp8,fp8,0,0.2036799987157186
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,2,128,1,float16,float16,0,0.22390933831532797
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,1,128,1,fp8,fp8,0,0.17786133289337158
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,2,128,1,fp8,fp8,0,0.17858133713404337
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,2,128,1,float16,fp8,0,0.22059200207392374
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,4,128,1,float16,fp8,0,0.2225333253542582
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,4,128,1,fp8,fp8,0,0.18115200599034628
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,4,128,1,float16,float16,0,0.2266026735305786
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,8,128,1,float16,float16,0,0.2301386594772339
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,8,128,1,float16,fp8,0,0.22402666012446085
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,24,128,1,float16,float16,0,0.12870933612187704
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,24,128,1,float16,fp8,0,0.12851199507713318
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,1,128,1,float16,float16,0,0.12054399649302165
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,24,8,128,1,fp8,fp8,0,0.18614933888117471
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,24,128,1,fp8,fp8,0,0.11585600177447002
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,1,128,1,fp8,fp8,0,0.10218133529027303
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,2,128,1,float16,float16,0,0.12123733758926392
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,1,128,1,float16,fp8,0,0.11774933338165283
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,2,128,1,float16,fp8,0,0.1188266674677531
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,2,128,1,fp8,fp8,0,0.1042080024878184
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,4,128,1,float16,float16,0,0.12296000123023987
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,4,128,1,fp8,fp8,0,0.10588266452153523
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,8,128,1,float16,float16,0,0.12404800454775493
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,8,128,1,float16,fp8,0,0.12153599659601848
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,4,128,1,float16,fp8,0,0.11978666981061299
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,1,128,1,float16,float16,0,0.07618133227030437
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,24,128,1,float16,float16,0,0.07884266475836436
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,24,128,1,float16,fp8,0,0.07867200175921123
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,1,128,1,float16,fp8,0,0.07506133119265239
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,24,8,128,1,fp8,fp8,0,0.10921600461006165
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,24,128,1,fp8,fp8,0,0.07179200152556102
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,1,128,1,fp8,fp8,0,0.06498133142789204
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,2,128,1,float16,float16,0,0.07576533158620198
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,2,128,1,float16,fp8,0,0.07468800246715546
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,2,128,1,fp8,fp8,0,0.06526400148868561
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,4,128,1,float16,float16,0,0.07617599765459697
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,4,128,1,float16,fp8,0,0.0749066670735677
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,8,128,1,float16,float16,0,0.07634666562080383
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,8,128,1,float16,fp8,0,0.07533333202203114
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,4,128,1,fp8,fp8,0,0.06599999964237213
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,24,8,128,1,fp8,fp8,0,0.06670400003592174
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,24,1,128,1,fp8,fp8,0,2.4482080141703286
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,24,1,128,1,float16,fp8,0,2.8422187169392905
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,24,1,128,1,float16,float16,0,2.913263956705729
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,24,2,128,1,fp8,fp8,0,2.5518773396809897
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,24,2,128,1,float16,fp8,0,2.9712320963541665
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,24,4,128,1,float16,fp8,0,3.0958665211995444
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,24,4,128,1,float16,float16,0,3.0654026667277017
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,24,2,128,1,float16,float16,0,2.9601707458496094
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,1,128,1,float16,float16,0,1.4403893152872722
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,24,4,128,1,fp8,fp8,0,2.7763360341389975
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,24,8,128,1,float16,float16,0,3.095984141031901
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,24,8,128,1,fp8,fp8,0,2.784912109375
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,24,128,1,float16,float16,0,1.686031977335612
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,24,8,128,1,float16,fp8,0,3.136202812194824
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,24,128,1,float16,fp8,0,1.6306239763895671
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,24,128,1,fp8,fp8,0,1.4196052551269531
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,1,128,1,fp8,fp8,0,1.235200007756551
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,1,128,1,float16,fp8,0,1.434922695159912
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,2,128,1,float16,float16,0,1.4557867050170898
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,2,128,1,float16,fp8,0,1.4484160741170247
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,2,128,1,fp8,fp8,0,1.2881066799163818
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,4,128,1,float16,float16,0,1.5422080357869465
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,4,128,1,fp8,fp8,0,1.3906772931416829
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,4,128,1,float16,fp8,0,1.532277266184489
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,8,128,1,float16,float16,0,1.557685375213623
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,8,128,1,float16,fp8,0,1.5686453183492024
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,1,128,1,float16,float16,0,0.7341279983520508
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,1,128,1,float16,fp8,0,0.7304213047027588
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,1,128,1,fp8,fp8,0,0.6174933513005575
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,24,8,128,1,fp8,fp8,0,1.4083147048950195
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,24,128,1,float16,float16,0,0.8337600231170654
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,2,128,1,float16,float16,0,0.7435200214385986
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,24,128,1,float16,fp8,0,0.832581361134847
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,2,128,1,float16,fp8,0,0.7387253443400065
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,24,128,1,fp8,fp8,0,0.7165813446044922
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,4,128,1,float16,float16,0,0.7749653657277426
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,2,128,1,fp8,fp8,0,0.6477226813634237
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,4,128,1,float16,fp8,0,0.7611520290374756
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,4,128,1,fp8,fp8,0,0.7076319853464762
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,1,128,1,float16,float16,0,0.3808693488438924
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,8,128,1,float16,float16,0,0.7769013245900472
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,1,128,1,float16,fp8,0,0.37854933738708496
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,8,128,1,float16,fp8,0,0.7890240351359049
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,24,128,1,float16,float16,0,0.4283093214035034
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,24,128,1,float16,fp8,0,0.4209386507670085
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,1,128,1,fp8,fp8,0,0.31944000720977783
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,24,8,128,1,fp8,fp8,0,0.7133013407389323
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,2,128,1,float16,fp8,0,0.3829919894536336
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,2,128,1,float16,float16,0,0.3855786720911662
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,24,128,1,fp8,fp8,0,0.368010679880778
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,2,128,1,fp8,fp8,0,0.33513601620992023
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,4,128,1,float16,float16,0,0.3988000154495239
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,4,128,1,float16,fp8,0,0.3935519854227702
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,4,128,1,fp8,fp8,0,0.36532799402872723
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,8,128,1,float16,fp8,0,0.3986453215281169
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,8,128,1,fp8,fp8,0,0.3695840040842692
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,24,128,1,float16,float16,0,0.22563199202219644
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,24,128,1,float16,fp8,0,0.2228320042292277
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,24,8,128,1,float16,float16,0,0.40030932426452637
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,1,128,1,float16,float16,0,0.20229866107304892
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,1,128,1,float16,fp8,0,0.20211732387542725
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,24,128,1,fp8,fp8,0,0.19155200322469076
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,1,128,1,fp8,fp8,0,0.15681599577267966
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,2,128,1,float16,float16,0,0.20347734292348227
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,2,128,1,fp8,fp8,0,0.1644266645113627
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,2,128,1,float16,fp8,0,0.2032853364944458
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,4,128,1,float16,fp8,0,0.20879467328389487
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,4,128,1,fp8,fp8,0,0.17690134048461914
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,4,128,1,float16,float16,0,0.20948266983032227
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,8,128,1,float16,float16,0,0.21477866172790527
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,8,128,1,float16,fp8,0,0.21101866165796915
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,24,128,1,float16,fp8,0,0.1207413375377655
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,1,128,1,float16,float16,0,0.10727999607721965
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,24,128,1,fp8,fp8,0,0.10129066308339436
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,24,128,1,float16,float16,0,0.12441600362459819
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,24,8,128,1,fp8,fp8,0,0.18374399344126383
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,1,128,1,float16,fp8,0,0.10760000348091125
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,1,128,1,fp8,fp8,0,0.08744000395139058
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,2,128,1,float16,float16,0,0.10821866989135742
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,2,128,1,float16,fp8,0,0.10846400260925293
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,2,128,1,fp8,fp8,0,0.08819199601809184
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,4,128,1,float16,float16,0,0.1120693286259969
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,4,128,1,float16,fp8,0,0.11116799712181091
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,4,128,1,fp8,fp8,0,0.09770666559537251
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,8,128,1,float16,fp8,0,0.11204800009727478
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,8,128,1,float16,float16,0,0.11297600467999776
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,24,8,128,1,fp8,fp8,0,0.097653329372406
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,1,128,1,float16,float16,0,0.0588266650835673
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,24,128,1,float16,float16,0,0.06547733147939046
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,1,128,1,float16,fp8,0,0.058549334605534874
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,24,128,1,fp8,fp8,0,0.05862399935722351
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,24,128,1,float16,fp8,0,0.06410666803518932
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,1,128,1,fp8,fp8,0,0.04975999891757965
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,2,128,1,float16,float16,0,0.05929600199063619
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,2,128,1,float16,fp8,0,0.05964266757170359
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,2,128,1,fp8,fp8,0,0.05091199775536855
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,4,128,1,float16,fp8,0,0.06099733213583628
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,4,128,1,fp8,fp8,0,0.0543039987484614
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,8,128,1,float16,float16,0,0.06137600044409434
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,8,128,1,float16,fp8,0,0.06128533184528351
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,4,128,1,float16,float16,0,0.060826669136683144
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,24,8,128,1,fp8,fp8,0,0.05600533386071523
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,24,128,1,float16,float16,0,0.03751466671625773
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,24,128,1,float16,fp8,0,0.03696533292531967
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,24,128,1,fp8,fp8,0,0.03472533325354258
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,1,128,1,float16,fp8,0,0.03538133452335993
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,1,128,1,float16,float16,0,0.03516799956560135
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,1,128,1,fp8,fp8,0,0.03164800008138021
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,2,128,1,fp8,fp8,0,0.03200533241033554
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,4,128,1,float16,float16,0,0.035887998839219414
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,2,128,1,float16,fp8,0,0.03587199995915095
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,4,128,1,float16,fp8,0,0.036570665736993156
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,4,128,1,fp8,fp8,0,0.033861334125200905
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,8,128,1,float16,float16,0,0.03645866612593333
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,2,128,1,float16,float16,0,0.035402665535608925
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,8,128,1,float16,fp8,0,0.03669333209594091
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,24,8,128,1,fp8,fp8,0,0.03383466601371765
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,24,1,128,1,fp8,fp8,0,2.0542933146158853
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,24,1,128,1,float16,float16,0,2.2817920049031577
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,24,1,128,1,float16,fp8,0,2.259727954864502
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,24,2,128,1,float16,float16,0,2.3440319697062173
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,24,2,128,1,float16,fp8,0,2.325391928354899
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,24,2,128,1,fp8,fp8,0,2.127946694691976
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,24,4,128,1,float16,fp8,0,2.4192800521850586
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,24,4,128,1,float16,float16,0,2.448746681213379
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,1,128,1,float16,float16,0,1.1413280169169109
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,24,4,128,1,fp8,fp8,0,2.368266741434733
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,1,128,1,float16,fp8,0,1.1287413438161213
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,24,128,1,float16,float16,0,1.365221341451009
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,24,128,1,float16,fp8,0,1.317087968190511
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,24,8,128,1,float16,float16,0,2.4706239700317383
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,24,8,128,1,float16,fp8,0,2.4960427284240723
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,24,128,1,fp8,fp8,0,1.2211573123931885
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,24,8,128,1,fp8,fp8,0,2.389237403869629
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,1,128,1,fp8,fp8,0,1.0281919638315837
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,2,128,1,float16,float16,0,1.1713813145955403
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,2,128,1,float16,fp8,0,1.142095963160197
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,4,128,1,float16,fp8,0,1.2224640051523845
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,2,128,1,fp8,fp8,0,1.0699946880340576
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,4,128,1,float16,float16,0,1.2270293235778809
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,4,128,1,fp8,fp8,0,1.189296007156372
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,8,128,1,float16,float16,0,1.2425546646118164
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,8,128,1,float16,fp8,0,1.2491947015126545
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,1,128,1,float16,float16,0,0.5768053531646729
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,1,128,1,float16,fp8,0,0.5717973311742147
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,24,128,1,float16,float16,0,0.6827253500620524
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,1,128,1,fp8,fp8,0,0.5112053155899048
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,24,8,128,1,fp8,fp8,0,1.2114613056182861
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,2,128,1,float16,float16,0,0.5823306639989217
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,2,128,1,float16,fp8,0,0.5813173453013102
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,24,128,1,float16,fp8,0,0.6703306833902994
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,2,128,1,fp8,fp8,0,0.5409866571426392
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,24,128,1,fp8,fp8,0,0.6139146486918131
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,4,128,1,float16,float16,0,0.616320013999939
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,4,128,1,float16,fp8,0,0.6010239919026693
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,4,128,1,fp8,fp8,0,0.6052746772766113
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,1,128,1,float16,float16,0,0.2987520098686218
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,8,128,1,float16,fp8,0,0.6233706474304199
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,8,128,1,fp8,fp8,0,0.6053333282470703
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,1,128,1,float16,fp8,0,0.29869333902994794
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,24,8,128,1,float16,float16,0,0.6217600107192993
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,24,128,1,float16,float16,0,0.348906675974528
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,24,128,1,fp8,fp8,0,0.32019199927647907
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,1,128,1,fp8,fp8,0,0.26544533173243207
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,24,128,1,float16,fp8,0,0.33987200260162354
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,2,128,1,float16,float16,0,0.3019893368085225
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,2,128,1,float16,fp8,0,0.300709327061971
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,2,128,1,fp8,fp8,0,0.27577600876490277
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,4,128,1,float16,float16,0,0.31597334146499634
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,4,128,1,float16,fp8,0,0.309663991133372
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,4,128,1,fp8,fp8,0,0.3132266600926717
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,8,128,1,float16,float16,0,0.3179786602656047
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,8,128,1,float16,fp8,0,0.3130720059076945
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,24,8,128,1,fp8,fp8,0,0.3118293285369873
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,24,128,1,float16,float16,0,0.18569066127141318
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,24,128,1,float16,fp8,0,0.18248534202575684
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,1,128,1,float16,float16,0,0.15758933623631796
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,1,128,1,float16,fp8,0,0.15752533078193665
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,24,128,1,fp8,fp8,0,0.1639199952284495
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,1,128,1,fp8,fp8,0,0.13080533345540366
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,2,128,1,fp8,fp8,0,0.13372266292572021
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,2,128,1,float16,fp8,0,0.16006400187810263
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,2,128,1,float16,float16,0,0.1590559979279836
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,4,128,1,float16,float16,0,0.16586132844289145
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,4,128,1,float16,fp8,0,0.16335999965667725
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,4,128,1,fp8,fp8,0,0.15149333079655966
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,8,128,1,float16,fp8,0,0.16645866632461548
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,1,128,1,float16,float16,0,0.08715732892354329
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,8,128,1,float16,float16,0,0.16833599408467612
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,24,128,1,float16,float16,0,0.10511466860771179
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,24,8,128,1,fp8,fp8,0,0.15465066830317178
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,24,128,1,fp8,fp8,0,0.08796266714731853
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,1,128,1,float16,fp8,0,0.08718933661778767
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,24,128,1,float16,fp8,0,0.10361066460609436
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,1,128,1,fp8,fp8,0,0.07403733332951863
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,2,128,1,float16,float16,0,0.08830933769543965
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,2,128,1,fp8,fp8,0,0.07566933333873749
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,4,128,1,float16,float16,0,0.0911253293355306
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,4,128,1,float16,fp8,0,0.09027733405431111
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,4,128,1,fp8,fp8,0,0.08301866551240285
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,2,128,1,float16,fp8,0,0.08847999572753906
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,8,128,1,float16,float16,0,0.0923466682434082
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,8,128,1,float16,fp8,0,0.09133332967758179
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,24,8,128,1,fp8,fp8,0,0.08402666449546814
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,24,128,1,float16,float16,0,0.05748266478379568
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,24,128,1,float16,fp8,0,0.056015998125076294
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,24,128,1,fp8,fp8,0,0.05272533496220907
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,1,128,1,float16,float16,0,0.0490880012512207
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,1,128,1,float16,fp8,0,0.048901334404945374
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,1,128,1,fp8,fp8,0,0.04265599946180979
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,2,128,1,float16,float16,0,0.04927466809749603
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,2,128,1,float16,fp8,0,0.04939199984073639
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,4,128,1,float16,float16,0,0.051167999704678856
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,2,128,1,fp8,fp8,0,0.04420800010363261
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,4,128,1,float16,fp8,0,0.05077333251635233
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,4,128,1,fp8,fp8,0,0.04816000163555145
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,8,128,1,float16,float16,0,0.052202666799227394
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,8,128,1,float16,fp8,0,0.051962668697039284
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,24,128,1,float16,float16,0,0.0332640012105306
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,24,8,128,1,fp8,fp8,0,0.05022400120894114
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,24,128,1,float16,fp8,0,0.033770665526390076
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,1,128,1,float16,float16,0,0.030965333183606465
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,1,128,1,float16,fp8,0,0.031157332162062328
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,24,128,1,fp8,fp8,0,0.03215999901294708
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,2,128,1,float16,fp8,0,0.0316746657093366
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,4,128,1,float16,float16,0,0.032261334359645844
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,2,128,1,float16,float16,0,0.031290667752424874
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,4,128,1,float16,fp8,0,0.03229333211978277
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,2,128,1,fp8,fp8,0,0.029909332593282063
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,4,128,1,fp8,fp8,0,0.03145066648721695
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,1,128,1,fp8,fp8,0,0.029477333029111225
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,8,128,1,float16,float16,0,0.032170665760835014
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,8,128,1,float16,fp8,0,0.03242666771014532
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,24,8,128,1,fp8,fp8,0,0.03188266605138779
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,1,128,1,float16,float16,0,0.025013332565625507
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,24,128,1,fp8,fp8,0,0.024773334463437397
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,24,128,1,float16,fp8,0,0.026261332134405773
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,24,128,1,float16,float16,0,0.02622399975856145
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,1,128,1,float16,fp8,0,0.025285333395004272
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,2,128,1,float16,float16,0,0.02510400116443634
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,2,128,1,fp8,fp8,0,0.02327466756105423
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,2,128,1,float16,fp8,0,0.025445332129796345
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,4,128,1,float16,float16,0,0.025781333446502686
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,4,128,1,float16,fp8,0,0.026165333886941273
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,1,128,1,fp8,fp8,0,0.022965334355831146
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,4,128,1,fp8,fp8,0,0.024197332561016083
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,8,128,1,float16,float16,0,0.025850666066010792
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,8,128,1,float16,fp8,0,0.0260959987839063
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,24,8,128,1,fp8,fp8,0,0.024634666740894318
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,24,1,128,1,float16,float16,0,0.9617813428243002
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,24,1,128,1,fp8,fp8,0,0.8586560090382894
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,24,1,128,1,float16,fp8,0,0.9568320115407308
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,24,2,128,1,fp8,fp8,0,0.8846027056376139
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,24,2,128,1,float16,fp8,0,0.9889973004659017
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,24,2,128,1,float16,float16,0,1.0079200267791748
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,24,4,128,1,float16,fp8,0,1.0617653528849285
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,24,4,128,1,float16,float16,0,1.0704426765441895
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,1,128,1,float16,float16,0,0.4907093445460002
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,24,4,128,1,fp8,fp8,0,1.024949312210083
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,24,8,128,1,float16,float16,0,1.0834986368815105
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,1,128,1,float16,fp8,0,0.48788801829020184
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,24,128,1,float16,fp8,0,0.5958346525828043
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,24,8,128,1,fp8,fp8,0,1.0452160040537517
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,24,128,1,fp8,fp8,0,0.5337866544723511
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,24,128,1,float16,float16,0,0.6128000020980835
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,24,8,128,1,float16,fp8,0,1.0775413513183594
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,1,128,1,fp8,fp8,0,0.42512532075246173
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,2,128,1,float16,float16,0,0.5036693414052328
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,2,128,1,float16,fp8,0,0.49802132447560626
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,2,128,1,fp8,fp8,0,0.4702613353729248
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,4,128,1,float16,fp8,0,0.5266933441162109
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,4,128,1,float16,float16,0,0.5386826594670614
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,4,128,1,fp8,fp8,0,0.5154666503270467
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,8,128,1,float16,float16,0,0.5402186711629232
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,8,128,1,float16,fp8,0,0.5435839891433716
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,1,128,1,float16,float16,0,0.25600000222524005
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,24,8,128,1,fp8,fp8,0,0.5286186536153158
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,24,128,1,float16,float16,0,0.3136906623840332
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,24,128,1,float16,fp8,0,0.30557866891225177
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,1,128,1,fp8,fp8,0,0.2202826738357544
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,1,128,1,float16,fp8,0,0.25327465931574505
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,2,128,1,float16,float16,0,0.2609386642773946
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,24,128,1,fp8,fp8,0,0.27663467327753705
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,2,128,1,float16,fp8,0,0.25986133019129437
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,2,128,1,fp8,fp8,0,0.2299306591351827
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,4,128,1,float16,float16,0,0.27249600489934284
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,4,128,1,float16,fp8,0,0.26901866992314655
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,4,128,1,fp8,fp8,0,0.26657066742579144
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,8,128,1,float16,fp8,0,0.271397332350413
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,8,128,1,fp8,fp8,0,0.2723626693089803
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,1,128,1,float16,float16,0,0.13498133420944214
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,24,8,128,1,float16,float16,0,0.2736746668815613
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,1,128,1,float16,fp8,0,0.13477866848309836
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,24,128,1,fp8,fp8,0,0.15035200119018555
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,24,128,1,float16,fp8,0,0.16332266728083292
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,1,128,1,fp8,fp8,0,0.1170240044593811
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,24,128,1,float16,float16,0,0.16589867075284323
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,2,128,1,float16,float16,0,0.13838932911554971
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,2,128,1,float16,fp8,0,0.1378986636797587
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,2,128,1,fp8,fp8,0,0.11984533071517944
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,4,128,1,float16,float16,0,0.14362133542696634
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,4,128,1,float16,fp8,0,0.14248533050219217
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,4,128,1,fp8,fp8,0,0.13781332969665527
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,8,128,1,float16,float16,0,0.14722133676211038
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,8,128,1,fp8,fp8,0,0.1430346667766571
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,24,128,1,float16,float16,0,0.09635200103123982
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,1,128,1,float16,float16,0,0.07730666796366374
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,24,8,128,1,float16,fp8,0,0.1436853309472402
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,24,128,1,fp8,fp8,0,0.08190399905045827
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,1,128,1,float16,fp8,0,0.07783466577529907
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,24,128,1,float16,fp8,0,0.0936959981918335
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,2,128,1,float16,float16,0,0.07913599908351898
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,2,128,1,float16,fp8,0,0.07936533292134602
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,2,128,1,fp8,fp8,0,0.06820266445477803
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,1,128,1,fp8,fp8,0,0.06674133241176605
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,4,128,1,float16,fp8,0,0.08191466828187306
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,4,128,1,float16,float16,0,0.08244266609350841
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,4,128,1,fp8,fp8,0,0.07689600189526875
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,8,128,1,float16,fp8,0,0.08309866487979889
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,8,128,1,fp8,fp8,0,0.07724266747633617
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,24,8,128,1,float16,float16,0,0.08264000217119853
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,24,128,1,float16,float16,0,0.05260799825191498
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,1,128,1,float16,fp8,0,0.04446933170159658
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,24,128,1,float16,fp8,0,0.051632001996040344
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,24,128,1,fp8,fp8,0,0.04834666848182678
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,1,128,1,float16,float16,0,0.044256001710891724
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,1,128,1,fp8,fp8,0,0.038746667404969536
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,2,128,1,float16,float16,0,0.04493333399295807
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,2,128,1,fp8,fp8,0,0.039333333571751915
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,4,128,1,float16,float16,0,0.04654933512210846
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,2,128,1,float16,fp8,0,0.04462933540344238
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,4,128,1,fp8,fp8,0,0.04338666796684265
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,8,128,1,float16,fp8,0,0.04660800099372864
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,4,128,1,float16,fp8,0,0.046480000019073486
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,8,128,1,float16,float16,0,0.047637333472569786
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,24,128,1,float16,float16,0,0.032069332897663116
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,24,8,128,1,fp8,fp8,0,0.04470400015513102
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,24,128,1,float16,fp8,0,0.032416000962257385
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,1,128,1,float16,float16,0,0.03014400104681651
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,1,128,1,float16,fp8,0,0.029898665845394135
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,1,128,1,fp8,fp8,0,0.027210667729377747
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,2,128,1,float16,fp8,0,0.030634666482607525
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,24,128,1,fp8,fp8,0,0.029861333469549816
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,2,128,1,fp8,fp8,0,0.027797333896160126
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,2,128,1,float16,float16,0,0.030415999392668407
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,4,128,1,float16,fp8,0,0.031178665657838184
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,4,128,1,float16,float16,0,0.030991998811562855
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,4,128,1,fp8,fp8,0,0.029301332930723827
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,8,128,1,float16,float16,0,0.031130666534105938
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,8,128,1,float16,fp8,0,0.03161066770553589
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,24,128,1,float16,float16,0,0.023232000569502514
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,24,128,1,fp8,fp8,0,0.0225600004196167
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,1,128,1,float16,float16,0,0.02203733225663503
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,1,128,1,float16,fp8,0,0.022175999979178112
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,24,128,1,float16,fp8,0,0.02316266546646754
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,24,8,128,1,fp8,fp8,0,0.029285334050655365
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,2,128,1,float16,float16,0,0.02213866760333379
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,1,128,1,fp8,fp8,0,0.020960000654061634
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,2,128,1,float16,fp8,0,0.02260799954334895
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,2,128,1,fp8,fp8,0,0.021130666136741638
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,4,128,1,float16,fp8,0,0.023306667804718018
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,4,128,1,float16,float16,0,0.022821334501107533
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,8,128,1,float16,fp8,0,0.022944000860055287
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,4,128,1,fp8,fp8,0,0.0220320001244545
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,8,128,1,fp8,fp8,0,0.022437334060668945
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,24,128,1,float16,float16,0,0.019424000134070713
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,24,128,1,float16,fp8,0,0.019925333559513092
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,24,128,1,fp8,fp8,0,0.01817600056529045
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,24,8,128,1,float16,float16,0,0.02239466706911723
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,1,128,1,float16,float16,0,0.018464000274737675
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,1,128,1,float16,fp8,0,0.018613333503405254
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,2,128,1,float16,fp8,0,0.018885333091020584
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,2,128,1,float16,float16,0,0.018677332748969395
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,2,128,1,fp8,fp8,0,0.017935999979575474
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,1,128,1,fp8,fp8,0,0.017583999782800674
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,4,128,1,fp8,fp8,0,0.017866666118303936
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,8,128,1,float16,float16,0,0.01854933301607768
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,8,128,1,float16,fp8,0,0.019018666197856266
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,8,128,1,fp8,fp8,0,0.01811733345190684
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,24,4,128,1,float16,fp8,0,0.018895999838908512
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,24,1,128,1,float16,float16,0,0.4471093416213989
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,24,1,128,1,fp8,fp8,0,0.42241064707438153
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,24,2,128,1,float16,float16,0,0.4603039820988973
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,24,1,128,1,float16,fp8,0,0.4487893184026082
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,24,2,128,1,float16,fp8,0,0.4550986687342326
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,24,2,128,1,fp8,fp8,0,0.47147198518117267
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,24,4,128,1,float16,float16,0,0.4984533389409383
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,24,4,128,1,float16,fp8,0,0.497104008992513
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,24,4,128,1,fp8,fp8,0,0.5162239869435629
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,24,8,128,1,float16,float16,0,0.5009119908014933
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,24,128,1,float16,float16,0,0.30666667222976685
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,1,128,1,float16,float16,0,0.23497066895167032
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,24,128,1,float16,fp8,0,0.2991200089454651
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,24,8,128,1,float16,fp8,0,0.5094879865646362
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,24,128,1,fp8,fp8,0,0.2732693354288737
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,24,8,128,1,fp8,fp8,0,0.5274879932403564
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,1,128,1,float16,fp8,0,0.2349920074144999
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,1,128,1,fp8,fp8,0,0.21637866894404092
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,2,128,1,float16,float16,0,0.24089600642522177
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,2,128,1,float16,fp8,0,0.23971199989318848
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,2,128,1,fp8,fp8,0,0.23799467086791992
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,4,128,1,float16,float16,0,0.25546133518218994
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,4,128,1,float16,fp8,0,0.25091733535130817
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,4,128,1,fp8,fp8,0,0.26635199785232544
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,8,128,1,float16,float16,0,0.25840532779693604
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,1,128,1,float16,float16,0,0.12718400359153748
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,8,128,1,float16,fp8,0,0.2546079953511556
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,24,8,128,1,fp8,fp8,0,0.27139200766881305
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,24,128,1,float16,fp8,0,0.1618133286635081
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,1,128,1,fp8,fp8,0,0.11641066273053487
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,24,128,1,fp8,fp8,0,0.14492266376813254
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,24,128,1,float16,float16,0,0.16622933745384216
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,1,128,1,float16,fp8,0,0.12879467010498047
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,2,128,1,float16,float16,0,0.1304800013701121
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,2,128,1,float16,fp8,0,0.12963199615478516
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,2,128,1,fp8,fp8,0,0.11949333548545837
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,4,128,1,float16,float16,0,0.13693867127100626
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,4,128,1,float16,fp8,0,0.13526399930318198
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,4,128,1,fp8,fp8,0,0.13570132851600647
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,8,128,1,float16,float16,0,0.13898133238156637
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,24,128,1,float16,float16,0,0.09172800183296204
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,8,128,1,fp8,fp8,0,0.1400373379389445
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,24,8,128,1,float16,fp8,0,0.13838932911554971
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,24,128,1,float16,fp8,0,0.08813866972923279
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,1,128,1,float16,float16,0,0.07256533205509186
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,1,128,1,float16,fp8,0,0.0728959987560908
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,24,128,1,fp8,fp8,0,0.08118399977684021
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,2,128,1,float16,float16,0,0.07361599802970886
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,1,128,1,fp8,fp8,0,0.06576533118883769
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,2,128,1,float16,fp8,0,0.0738560010989507
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,4,128,1,float16,float16,0,0.07720533510049184
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,4,128,1,fp8,fp8,0,0.07579733431339264
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,4,128,1,float16,fp8,0,0.07654933134714763
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,8,128,1,float16,float16,0,0.07825066645940144
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,2,128,1,fp8,fp8,0,0.06834666430950165
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,8,128,1,float16,fp8,0,0.07818666597207387
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,24,128,1,float16,float16,0,0.050698667764663696
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,24,8,128,1,fp8,fp8,0,0.07593066493670146
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,24,128,1,float16,fp8,0,0.049173335234324135
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,1,128,1,float16,float16,0,0.04146133363246918
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,1,128,1,float16,fp8,0,0.04153066625197729
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,24,128,1,fp8,fp8,0,0.047450666626294456
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,2,128,1,float16,float16,0,0.042064001162846885
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,1,128,1,fp8,fp8,0,0.038245332737763725
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,2,128,1,float16,fp8,0,0.042080000042915344
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,4,128,1,float16,float16,0,0.044266665975252785
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,2,128,1,fp8,fp8,0,0.03893866638342539
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,4,128,1,float16,fp8,0,0.04359999795754751
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,4,128,1,fp8,fp8,0,0.042917331059773765
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,8,128,1,float16,float16,0,0.04459733267625173
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,8,128,1,float16,fp8,0,0.04491733511288961
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,24,128,1,float16,float16,0,0.03164266546567281
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,1,128,1,float16,float16,0,0.02889599899450938
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,24,128,1,float16,fp8,0,0.031770666440327965
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,24,8,128,1,fp8,fp8,0,0.04436799883842468
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,1,128,1,float16,fp8,0,0.02865600089232127
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,1,128,1,fp8,fp8,0,0.027376001079877216
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,24,128,1,fp8,fp8,0,0.029391999046007793
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,2,128,1,float16,float16,0,0.02938666691382726
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,2,128,1,float16,fp8,0,0.029845332105954487
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,2,128,1,fp8,fp8,0,0.02769600103298823
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,4,128,1,float16,float16,0,0.029951999584833782
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,4,128,1,fp8,fp8,0,0.029103999336560566
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,8,128,1,float16,float16,0,0.030224000414212544
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,8,128,1,float16,fp8,0,0.030975999931494396
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,4,128,1,float16,fp8,0,0.03070399910211563
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,24,8,128,1,fp8,fp8,0,0.02926933268706004
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,24,128,1,float16,fp8,0,0.022821334501107533
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,24,128,1,fp8,fp8,0,0.022437334060668945
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,24,128,1,float16,float16,0,0.022463999688625336
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,1,128,1,float16,fp8,0,0.021365332106749218
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,1,128,1,float16,float16,0,0.02091199904680252
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,2,128,1,float16,fp8,0,0.021344001094500225
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,2,128,1,fp8,fp8,0,0.02107733239730199
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,1,128,1,fp8,fp8,0,0.02053333322207133
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,4,128,1,fp8,fp8,0,0.022266666094462078
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,4,128,1,float16,float16,0,0.02180800090233485
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,8,128,1,float16,float16,0,0.021744000415007275
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,4,128,1,float16,fp8,0,0.021914665897687275
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,2,128,1,float16,float16,0,0.02089066555102666
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,8,128,1,fp8,fp8,0,0.022309333086013794
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,24,8,128,1,float16,fp8,0,0.02203733225663503
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,24,128,1,float16,fp8,0,0.01785600061217944
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,24,128,1,fp8,fp8,0,0.018197332819302876
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,1,128,1,float16,float16,0,0.017290666699409485
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,1,128,1,float16,fp8,0,0.017616000026464462
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,2,128,1,float16,float16,0,0.01736533393462499
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,2,128,1,float16,fp8,0,0.01798933371901512
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,2,128,1,fp8,fp8,0,0.017680000513792038
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,24,128,1,float16,float16,0,0.017530667285124462
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,1,128,1,fp8,fp8,0,0.017562666287024815
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,4,128,1,float16,fp8,0,0.01786133274435997
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,4,128,1,fp8,fp8,0,0.01781333362062772
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,4,128,1,float16,float16,0,0.017312000195185345
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,8,128,1,float16,float16,0,0.017653333644072216
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,8,128,1,fp8,fp8,0,0.01803733284274737
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,24,8,128,1,float16,fp8,0,0.017952000101407368
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,24,128,1,float16,float16,0,0.016415999581416447
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,24,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,24,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,1,128,1,float16,float16,0,0.016837333639462788
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,1,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,1,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,2,128,1,float16,float16,0,0.016565332810084026
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,2,128,1,float16,fp8,0,0.01721599946419398
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,2,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,4,128,1,float16,float16,0,0.01700266698996226
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,4,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,8,128,1,float16,float16,0,0.016688000410795212
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,4,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,8,128,1,fp8,fp8,0,0.01736533393462499
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,24,8,128,1,float16,fp8,0,0.01749333366751671
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,24,1,128,1,float16,float16,0,0.28337599833806354
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,24,1,128,1,float16,fp8,0,0.2844640016555786
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,24,1,128,1,fp8,fp8,0,0.29292800029118854
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,24,2,128,1,float16,fp8,0,0.28599466880162555
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,24,4,128,1,float16,float16,0,0.30157333612442017
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,24,2,128,1,float16,float16,0,0.28732800483703613
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,24,4,128,1,float16,fp8,0,0.3006239930788676
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,24,2,128,1,fp8,fp8,0,0.3041119972864787
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,24,4,128,1,fp8,fp8,0,0.34494932492574054
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,1,128,1,float16,float16,0,0.14987199505170187
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,24,8,128,1,float16,fp8,0,0.3011893431345622
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,24,8,128,1,float16,float16,0,0.3001226584116618
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,24,128,1,float16,float16,0,0.17293866475423178
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,24,8,128,1,fp8,fp8,0,0.3466506799062093
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,24,128,1,fp8,fp8,0,0.18285866578420004
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,1,128,1,float16,fp8,0,0.15004266301790872
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,1,128,1,fp8,fp8,0,0.15652267138163248
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,24,128,1,float16,fp8,0,0.1673706571261088
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,2,128,1,float16,float16,0,0.1511360009511312
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,2,128,1,fp8,fp8,0,0.1606666644414266
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,2,128,1,float16,fp8,0,0.15095466375350952
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,4,128,1,float16,float16,0,0.1577173372109731
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,4,128,1,float16,fp8,0,0.15809067090352377
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,4,128,1,fp8,fp8,0,0.17593065897623697
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,8,128,1,float16,float16,0,0.16025599837303162
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,8,128,1,float16,fp8,0,0.15945600469907126
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,1,128,1,float16,fp8,0,0.08343999584515889
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,24,128,1,float16,float16,0,0.09302399555842082
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,24,8,128,1,fp8,fp8,0,0.1802133321762085
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,1,128,1,float16,float16,0,0.08252266546090443
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,24,128,1,float16,fp8,0,0.09050666292508443
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,24,128,1,fp8,fp8,0,0.10120532910029094
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,1,128,1,fp8,fp8,0,0.08715200424194336
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,2,128,1,float16,fp8,0,0.08475733796755473
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,2,128,1,fp8,fp8,0,0.08970133463541667
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,4,128,1,float16,fp8,0,0.08705600102742513
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,4,128,1,float16,float16,0,0.08691733082135518
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,2,128,1,float16,float16,0,0.08413333694140117
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,4,128,1,fp8,fp8,0,0.09621333082516988
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,8,128,1,float16,float16,0,0.0876533289750417
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,8,128,1,float16,fp8,0,0.08767466743787129
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,24,128,1,float16,float16,0,0.052970667680104576
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,24,8,128,1,fp8,fp8,0,0.09683733185132344
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,24,128,1,float16,fp8,0,0.05089066425959269
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,1,128,1,fp8,fp8,0,0.04890666902065277
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,2,128,1,float16,float16,0,0.04713066418965658
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,1,128,1,float16,float16,0,0.04651733239491781
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,24,128,1,fp8,fp8,0,0.05763733386993408
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,1,128,1,float16,fp8,0,0.04689066608746847
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,2,128,1,fp8,fp8,0,0.050186668833096824
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,2,128,1,float16,fp8,0,0.0479360024134318
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,4,128,1,float16,float16,0,0.04933333396911621
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,4,128,1,float16,fp8,0,0.04929600159327189
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,4,128,1,fp8,fp8,0,0.05379199981689453
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,8,128,1,float16,fp8,0,0.04959466556708018
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,8,128,1,float16,float16,0,0.04997866849104563
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,24,8,128,1,fp8,fp8,0,0.05449600021044413
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,24,128,1,float16,float16,0,0.03012266755104065
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,24,128,1,float16,fp8,0,0.029861333469549816
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,24,128,1,fp8,fp8,0,0.03404266635576884
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,1,128,1,float16,fp8,0,0.02951466788848241
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,1,128,1,fp8,fp8,0,0.030720000465710957
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,1,128,1,float16,float16,0,0.02918400118748347
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,2,128,1,float16,float16,0,0.029391999046007793
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,2,128,1,float16,fp8,0,0.0296426663796107
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,2,128,1,fp8,fp8,0,0.03133333226044973
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,4,128,1,float16,float16,0,0.0305173322558403
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,4,128,1,float16,fp8,0,0.03046400099992752
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,4,128,1,fp8,fp8,0,0.03299733251333237
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,8,128,1,float16,float16,0,0.030453334252039593
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,8,128,1,float16,fp8,0,0.03062933435042699
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,24,8,128,1,fp8,fp8,0,0.03278400003910065
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,24,128,1,float16,float16,0,0.02387733260790507
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,24,128,1,float16,fp8,0,0.023930666347344715
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,24,128,1,fp8,fp8,0,0.02571200082699458
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,1,128,1,float16,fp8,0,0.023232000569502514
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,1,128,1,fp8,fp8,0,0.02385599911212921
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,1,128,1,float16,float16,0,0.022853332261244457
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,2,128,1,float16,float16,0,0.0233599990606308
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,2,128,1,float16,fp8,0,0.023455999791622162
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,4,128,1,float16,float16,0,0.023567999402681988
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,4,128,1,float16,fp8,0,0.02418133368094762
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,8,128,1,float16,float16,0,0.023797333240509033
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,4,128,1,fp8,fp8,0,0.025621332228183746
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,8,128,1,fp8,fp8,0,0.02589866767326991
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,8,128,1,float16,fp8,0,0.024186665813128155
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,24,2,128,1,fp8,fp8,0,0.024085332949956257
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,24,128,1,float16,float16,0,0.017071999609470367
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,24,128,1,float16,fp8,0,0.01729600007335345
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,1,128,1,float16,float16,0,0.01637866720557213
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,24,128,1,fp8,fp8,0,0.018250666558742523
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,1,128,1,float16,fp8,0,0.016549333930015564
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,1,128,1,fp8,fp8,0,0.01752000053723653
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,2,128,1,float16,float16,0,0.01626666635274887
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,2,128,1,fp8,fp8,0,0.017637333522240322
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,2,128,1,float16,fp8,0,0.016565332810084026
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,4,128,1,float16,float16,0,0.01640533283352852
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,4,128,1,float16,fp8,0,0.016778666526079178
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,4,128,1,fp8,fp8,0,0.017914666483799618
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,8,128,1,float16,float16,0,0.016410666207472484
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,8,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,24,8,128,1,fp8,fp8,0,0.01807466646035512
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,24,128,1,float16,float16,0,0.015498666713635126
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,24,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,24,128,1,float16,fp8,0,0.015791999797026317
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,1,128,1,float16,float16,0,0.01545599972208341
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,1,128,1,fp8,fp8,0,0.016688000410795212
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,1,128,1,float16,fp8,0,0.01598400001724561
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,2,128,1,float16,float16,0,0.015482666591803232
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,2,128,1,float16,fp8,0,0.01602666700879733
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,4,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,4,128,1,float16,float16,0,0.015610666324694952
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,8,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,8,128,1,float16,float16,0,0.015664000064134598
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,8,128,1,float16,fp8,0,0.016208000481128693
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,4,128,1,float16,fp8,0,0.015930666277805965
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,24,128,1,float16,float16,0,0.014858666807413101
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,24,2,128,1,fp8,fp8,0,0.01657066618402799
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,24,128,1,float16,fp8,0,0.015360000232855478
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,24,128,1,fp8,fp8,0,0.016122666498025257
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,1,128,1,float16,float16,0,0.015114666273196539
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,1,128,1,float16,fp8,0,0.015642666568358738
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,2,128,1,float16,float16,0,0.014991999914248785
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,2,128,1,float16,fp8,0,0.015850666910409927
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,1,128,1,fp8,fp8,0,0.016261332978804905
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,2,128,1,fp8,fp8,0,0.016117333124081295
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,4,128,1,float16,fp8,0,0.01570133368174235
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,4,128,1,fp8,fp8,0,0.01634666696190834
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,8,128,1,fp8,fp8,0,0.01646399994691213
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,8,128,1,float16,fp8,0,0.015552000453074774
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,4,128,1,float16,float16,0,0.015125333021084467
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,24,8,128,1,float16,float16,0,0.014975999792416891
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,24,1,128,1,float16,float16,0,0.20061333974202475
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,24,1,128,1,float16,fp8,0,0.19978666305541992
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,24,1,128,1,fp8,fp8,0,0.23770666122436523
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,24,2,128,1,float16,float16,0,0.2009333372116089
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,24,2,128,1,float16,fp8,0,0.2007253368695577
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,24,2,128,1,fp8,fp8,0,0.23902400334676108
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,24,4,128,1,float16,float16,0,0.20781334241231283
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,24,4,128,1,float16,fp8,0,0.20805333058039346
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,24,4,128,1,fp8,fp8,0,0.25805334250132245
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,24,8,128,1,float16,float16,0,0.2104319930076599
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,24,128,1,float16,float16,0,0.11639466881752014
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,1,128,1,float16,float16,0,0.1051680048306783
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,24,8,128,1,float16,fp8,0,0.2106293241182963
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,24,128,1,float16,fp8,0,0.11450133721033733
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,24,128,1,fp8,fp8,0,0.14178666472434998
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,1,128,1,float16,fp8,0,0.1060746709505717
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,24,8,128,1,fp8,fp8,0,0.2595893343289693
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,1,128,1,fp8,fp8,0,0.12826133767763773
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,2,128,1,float16,float16,0,0.10637332995732625
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,2,128,1,float16,fp8,0,0.10718400279680888
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,2,128,1,fp8,fp8,0,0.13008000453313193
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,4,128,1,float16,fp8,0,0.1104800005753835
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,4,128,1,fp8,fp8,0,0.13924800356229147
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,4,128,1,float16,float16,0,0.10997866590817769
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,8,128,1,float16,float16,0,0.1126026709874471
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,8,128,1,float16,fp8,0,0.1123466690381368
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,24,8,128,1,fp8,fp8,0,0.13874133427937826
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,24,128,1,float16,fp8,0,0.0626933326323827
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,1,128,1,float16,float16,0,0.057946667075157166
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,24,128,1,float16,float16,0,0.06308266520500183
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,24,128,1,fp8,fp8,0,0.07912000020345052
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,1,128,1,fp8,fp8,0,0.07017600039641063
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,2,128,1,fp8,fp8,0,0.0710346649090449
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,1,128,1,float16,fp8,0,0.058389330903689064
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,2,128,1,float16,fp8,0,0.05938133100668589
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,4,128,1,fp8,fp8,0,0.0747680018345515
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,4,128,1,float16,fp8,0,0.060218666990598045
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,4,128,1,float16,float16,0,0.060778667529424034
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,2,128,1,float16,float16,0,0.05898666878541311
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,8,128,1,float16,float16,0,0.06083733340104421
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,8,128,1,float16,fp8,0,0.06154133379459381
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,24,128,1,float16,float16,0,0.03495466709136963
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,1,128,1,float16,float16,0,0.03479466587305069
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,24,128,1,fp8,fp8,0,0.043365334471066795
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,24,128,1,float16,fp8,0,0.03513599932193756
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,1,128,1,float16,fp8,0,0.03505066782236099
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,24,8,128,1,fp8,fp8,0,0.07491733133792877
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,1,128,1,fp8,fp8,0,0.040991999208927155
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,2,128,1,float16,float16,0,0.0353973334034284
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,4,128,1,float16,float16,0,0.035631999373435974
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,2,128,1,float16,fp8,0,0.03526933242877325
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,2,128,1,fp8,fp8,0,0.041482667128245033
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,4,128,1,float16,fp8,0,0.03568533311287562
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,4,128,1,fp8,fp8,0,0.0433599998553594
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,8,128,1,float16,fp8,0,0.03604800005753835
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,8,128,1,fp8,fp8,0,0.04348266621430715
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,24,8,128,1,float16,float16,0,0.035642666121323906
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,24,128,1,float16,float16,0,0.02470933397610982
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,24,128,1,fp8,fp8,0,0.02962133288383484
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,24,128,1,float16,fp8,0,0.024853333830833435
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,1,128,1,float16,float16,0,0.023786666492621105
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,1,128,1,float16,fp8,0,0.02369600037733714
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,1,128,1,fp8,fp8,0,0.0277813325325648
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,2,128,1,float16,float16,0,0.024112001061439514
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,2,128,1,fp8,fp8,0,0.028117333849271137
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,4,128,1,float16,float16,0,0.024826665719350178
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,4,128,1,fp8,fp8,0,0.02934933453798294
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,4,128,1,float16,fp8,0,0.024735999604066212
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,8,128,1,float16,float16,0,0.024858665963013966
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,8,128,1,float16,fp8,0,0.024735999604066212
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,8,128,1,fp8,fp8,0,0.029535998900731403
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,24,2,128,1,float16,fp8,0,0.023845332364241283
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,24,128,1,float16,float16,0,0.019018666197856266
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,24,128,1,float16,fp8,0,0.01887999971707662
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,24,128,1,fp8,fp8,0,0.021568000316619873
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,1,128,1,float16,float16,0,0.018325333793958027
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,2,128,1,float16,float16,0,0.018432000031073887
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,1,128,1,fp8,fp8,0,0.021210665504137676
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,1,128,1,float16,fp8,0,0.018458666900793713
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,2,128,1,fp8,fp8,0,0.02090666691462199
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,4,128,1,float16,fp8,0,0.018506667266289394
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,4,128,1,float16,float16,0,0.018565333137909572
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,4,128,1,fp8,fp8,0,0.021018666525681812
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,8,128,1,float16,fp8,0,0.019194666296243668
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,8,128,1,float16,float16,0,0.019066666563351948
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,2,128,1,float16,fp8,0,0.01850133389234543
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,24,8,128,1,fp8,fp8,0,0.02144533395767212
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,24,128,1,float16,float16,0,0.01522133375207583
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,24,128,1,float16,fp8,0,0.015594666202863058
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,24,128,1,fp8,fp8,0,0.017583999782800674
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,1,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,1,128,1,float16,float16,0,0.014933332800865173
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,1,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,2,128,1,float16,fp8,0,0.01552533358335495
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,2,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,2,128,1,float16,float16,0,0.014789332946141561
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,4,128,1,float16,float16,0,0.015173333386580149
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,4,128,1,fp8,fp8,0,0.017221332838137943
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,4,128,1,float16,fp8,0,0.015530666957298914
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,8,128,1,float16,float16,0,0.015429332852363586
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,8,128,1,float16,fp8,0,0.015754666179418564
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,24,8,128,1,fp8,fp8,0,0.017717332889636356
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,24,128,1,float16,float16,0,0.014309333016475042
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,24,128,1,fp8,fp8,0,0.016586666305859882
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,1,128,1,float16,float16,0,0.01434133326013883
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,1,128,1,float16,fp8,0,0.015029333531856537
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,1,128,1,fp8,fp8,0,0.01646399994691213
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,2,128,1,float16,float16,0,0.014389333625634512
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,24,128,1,float16,fp8,0,0.014581333845853806
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,2,128,1,float16,fp8,0,0.014874666929244995
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,2,128,1,fp8,fp8,0,0.016303999970356624
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,4,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,4,128,1,float16,float16,0,0.014405333747466406
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,4,128,1,fp8,fp8,0,0.016629333297411602
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,8,128,1,float16,float16,0,0.01441066712141037
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,8,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,24,128,1,float16,fp8,0,0.014250667144854864
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,24,128,1,float16,float16,0,0.013818666338920593
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,24,8,128,1,fp8,fp8,0,0.01643199970324834
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,24,128,1,fp8,fp8,0,0.015893333901961643
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,1,128,1,float16,float16,0,0.013967999567588171
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,1,128,1,float16,fp8,0,0.014682666709025701
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,1,128,1,fp8,fp8,0,0.016048000504573185
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,2,128,1,float16,float16,0,0.014085333794355392
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,2,128,1,float16,fp8,0,0.014661333213249842
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,2,128,1,fp8,fp8,0,0.015935999651749928
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,4,128,1,float16,fp8,0,0.014783999572197596
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,4,128,1,fp8,fp8,0,0.016048000504573185
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,8,128,1,float16,float16,0,0.014069333672523499
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,8,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,8,128,1,fp8,fp8,0,0.016117333124081295
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,24,4,128,1,float16,float16,0,0.013914667069911957
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,24,1,128,1,float16,float16,0,0.170522669951121
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,24,1,128,1,float16,fp8,0,0.17088532447814941
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,24,1,128,1,fp8,fp8,0,0.2043466567993164
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,24,2,128,1,float16,float16,0,0.17152533928553262
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,24,2,128,1,float16,fp8,0,0.17213332653045654
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,24,2,128,1,fp8,fp8,0,0.20719999074935913
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,24,4,128,1,float16,fp8,0,0.17454399665196738
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,24,4,128,1,float16,float16,0,0.17470399538675943
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,24,4,128,1,fp8,fp8,0,0.2146773338317871
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,24,8,128,1,float16,fp8,0,0.17512534062067667
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,24,8,128,1,float16,float16,0,0.1760746637980143
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,24,128,1,float16,float16,0,0.08901866277058919
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,24,128,1,float16,fp8,0,0.08769599596659343
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,24,8,128,1,fp8,fp8,0,0.21530133485794067
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,1,128,1,float16,float16,0,0.09057600299517314
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,1,128,1,float16,fp8,0,0.09086400270462036
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,1,128,1,fp8,fp8,0,0.10918399691581726
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,24,128,1,fp8,fp8,0,0.11785067121187846
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,2,128,1,float16,fp8,0,0.09106133381525676
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,4,128,1,float16,float16,0,0.09258133172988892
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,2,128,1,float16,float16,0,0.09151466687520345
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,2,128,1,fp8,fp8,0,0.11043199896812439
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,4,128,1,fp8,fp8,0,0.11372266213099162
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,8,128,1,float16,float16,0,0.09405333797136943
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,4,128,1,float16,fp8,0,0.09336533149083455
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,8,128,1,fp8,fp8,0,0.11617066462834676
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,24,8,128,1,float16,fp8,0,0.09232532978057861
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,24,128,1,fp8,fp8,0,0.06366399923960368
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,24,128,1,float16,float16,0,0.04752000172932943
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,1,128,1,float16,float16,0,0.051738664507865906
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,1,128,1,float16,fp8,0,0.05213333169619242
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,2,128,1,float16,float16,0,0.052186667919158936
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,24,128,1,float16,fp8,0,0.04770133395989736
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,2,128,1,fp8,fp8,0,0.0625493327776591
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,4,128,1,float16,float16,0,0.05264533559481303
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,2,128,1,float16,fp8,0,0.05206400156021118
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,4,128,1,float16,fp8,0,0.05300266544024149
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,1,128,1,fp8,fp8,0,0.062261333068211876
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,4,128,1,fp8,fp8,0,0.06449600060780843
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,8,128,1,float16,float16,0,0.05328533550103506
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,8,128,1,fp8,fp8,0,0.06469333171844482
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,24,8,128,1,float16,fp8,0,0.05362666646639506
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,24,128,1,float16,float16,0,0.031221332649389904
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,24,128,1,fp8,fp8,0,0.039317332208156586
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,1,128,1,float16,fp8,0,0.03270933280388514
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,1,128,1,fp8,fp8,0,0.038533332447210945
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,24,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,1,128,1,float16,float16,0,0.032416000962257385
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,2,128,1,float16,float16,0,0.032773333291212715
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,2,128,1,fp8,fp8,0,0.03846399982770284
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,4,128,1,float16,float16,0,0.03323733309904734
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,4,128,1,float16,fp8,0,0.0335413341720899
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,4,128,1,fp8,fp8,0,0.040218666195869446
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,2,128,1,float16,fp8,0,0.03284800052642822
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,8,128,1,fp8,fp8,0,0.039919999738534294
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,24,8,128,1,float16,fp8,0,0.03348266581694285
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,24,128,1,float16,float16,0,0.021045332153638203
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,24,128,1,fp8,fp8,0,0.02515200028816859
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,1,128,1,float16,float16,0,0.021221332252025604
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,2,128,1,float16,float16,0,0.02117866774400075
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,2,128,1,fp8,fp8,0,0.02478400121132533
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,24,128,1,float16,fp8,0,0.02160533269246419
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,4,128,1,float16,float16,0,0.021253332495689392
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,1,128,1,fp8,fp8,0,0.02475733309984207
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,2,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,1,128,1,float16,fp8,0,0.021104000508785248
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,4,128,1,float16,fp8,0,0.021482666333516438
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,4,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,8,128,1,float16,float16,0,0.021018666525681812
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,8,128,1,fp8,fp8,0,0.02518933266401291
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,24,8,128,1,float16,fp8,0,0.021333334346612293
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,24,128,1,float16,fp8,0,0.01825599993268649
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,24,128,1,float16,float16,0,0.017397332936525345
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,1,128,1,float16,float16,0,0.01749333366751671
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,1,128,1,float16,fp8,0,0.017701332767804463
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,1,128,1,fp8,fp8,0,0.0206986665725708
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,24,128,1,fp8,fp8,0,0.021002667645613354
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,2,128,1,float16,float16,0,0.017557332913080852
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,2,128,1,float16,fp8,0,0.01807466646035512
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,2,128,1,fp8,fp8,0,0.02086399992307027
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,4,128,1,fp8,fp8,0,0.021082667013009388
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,4,128,1,float16,fp8,0,0.018437333405017853
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,4,128,1,float16,float16,0,0.01756799966096878
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,8,128,1,float16,float16,0,0.017871999492247898
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,8,128,1,fp8,fp8,0,0.0215786670645078
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,24,8,128,1,float16,fp8,0,0.018207999567190807
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,1,128,1,fp8,fp8,0,0.016693333784739178
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,24,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,2,128,1,float16,float16,0,0.014287999520699183
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,24,128,1,fp8,fp8,0,0.0174346665541331
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,1,128,1,float16,float16,0,0.014352000008026758
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,24,128,1,float16,float16,0,0.014682666709025701
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,1,128,1,float16,fp8,0,0.014826666563749313
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,2,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,4,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,8,128,1,float16,float16,0,0.014490666488806406
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,2,128,1,float16,fp8,0,0.014981333166360855
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,4,128,1,fp8,fp8,0,0.017375999440749485
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,4,128,1,float16,float16,0,0.014757333944241205
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,24,128,1,float16,float16,0,0.013951999445756277
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,24,128,1,float16,fp8,0,0.014581333845853806
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,8,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,24,8,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,24,128,1,fp8,fp8,0,0.016309333344300587
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,1,128,1,float16,fp8,0,0.014757333944241205
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,1,128,1,fp8,fp8,0,0.01632533346613248
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,2,128,1,float16,fp8,0,0.014794666320085526
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,2,128,1,float16,float16,0,0.014064000298579534
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,1,128,1,float16,float16,0,0.013999999811251959
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,4,128,1,float16,fp8,0,0.014650666465361914
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,4,128,1,fp8,fp8,0,0.01640533283352852
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,2,128,1,fp8,fp8,0,0.016271999726692837
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,8,128,1,float16,fp8,0,0.014789332946141561
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,8,128,1,fp8,fp8,0,0.01624533285697301
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,8,128,1,float16,float16,0,0.014064000298579534
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,24,4,128,1,float16,float16,0,0.014720000326633453
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,24,128,1,float16,fp8,0,0.014266667266686758
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,24,128,1,fp8,fp8,0,0.015781333049138386
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,1,128,1,float16,fp8,0,0.014618666221698126
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,1,128,1,fp8,fp8,0,0.016010666886965435
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,24,128,1,float16,float16,0,0.013616000612576803
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,2,128,1,float16,float16,0,0.013866666704416275
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,1,128,1,float16,float16,0,0.013786666095256805
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,2,128,1,fp8,fp8,0,0.015925332903862
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,2,128,1,float16,fp8,0,0.014511999984582266
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,4,128,1,float16,float16,0,0.013904000322024027
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,4,128,1,float16,fp8,0,0.014597332725922266
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,4,128,1,fp8,fp8,0,0.015962666521469753
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,8,128,1,float16,fp8,0,0.01461333284775416
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,8,128,1,float16,float16,0,0.013807999591032663
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,24,8,128,1,fp8,fp8,0,0.01588800052801768
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,16,1,128,1,fp8,fp8,0,10.74020767211914
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,16,1,128,1,float16,fp8,0,16.191162109375
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,16,1,128,1,float16,float16,0,16.423418680826824
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,16,2,128,1,float16,float16,0,16.654805501302082
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,16,2,128,1,float16,fp8,0,16.530228932698567
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,16,2,128,1,fp8,fp8,0,10.739781697591146
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,16,4,128,1,float16,fp8,0,16.95789337158203
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,16,4,128,1,float16,float16,0,16.6516850789388
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,1,128,1,float16,float16,0,8.307200113932291
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,16,4,128,1,fp8,fp8,0,10.858683268229166
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,16,128,1,float16,float16,0,8.162101109822592
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,16,128,1,float16,fp8,0,8.125658671061197
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,16,8,128,1,float16,fp8,0,16.51577631632487
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,16,128,1,fp8,fp8,0,5.080565452575684
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,16,8,128,1,fp8,fp8,0,11.061295827229818
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,1,128,1,float16,fp8,0,8.28713607788086
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,1,128,1,fp8,fp8,0,5.299962679545085
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,2,128,1,float16,float16,0,8.393573125203451
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,16,8,128,1,float16,float16,0,16.38202667236328
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,2,128,1,float16,fp8,0,8.240666707356771
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,2,128,1,fp8,fp8,0,5.110879898071289
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,4,128,1,float16,fp8,0,8.192911783854166
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,4,128,1,float16,float16,0,8.238608042399088
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,4,128,1,fp8,fp8,0,5.354239781697591
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,8,128,1,float16,float16,0,8.446538925170898
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,8,128,1,float16,fp8,0,8.340703964233398
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,1,128,1,float16,float16,0,4.056266784667969
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,1,128,1,float16,fp8,0,4.157610575358073
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,16,128,1,float16,fp8,0,3.858384132385254
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,1,128,1,fp8,fp8,0,2.639359951019287
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,16,128,1,float16,float16,0,4.001392046610515
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,16,8,128,1,fp8,fp8,0,5.296629269917806
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,16,128,1,fp8,fp8,0,2.6780694325764975
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,2,128,1,float16,float16,0,3.985439936319987
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,2,128,1,float16,fp8,0,3.9385226567586265
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,2,128,1,fp8,fp8,0,2.6702025731404624
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,4,128,1,float16,float16,0,4.017626762390137
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,4,128,1,float16,fp8,0,3.928325335184733
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,4,128,1,fp8,fp8,0,2.657968044281006
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,8,128,1,float16,float16,0,4.006346702575684
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,8,128,1,float16,fp8,0,4.1403201421101885
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,16,128,1,float16,float16,0,2.050645351409912
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,16,8,128,1,fp8,fp8,0,2.704287846883138
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,1,128,1,float16,float16,0,2.0932532946268716
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,1,128,1,float16,fp8,0,2.0734559694925943
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,16,128,1,float16,fp8,0,2.059706687927246
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,16,128,1,fp8,fp8,0,1.40556796391805
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,1,128,1,fp8,fp8,0,1.4271574020385742
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,2,128,1,fp8,fp8,0,1.4328160285949707
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,2,128,1,float16,float16,0,2.1117547353108725
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,2,128,1,float16,fp8,0,2.086362679799398
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,4,128,1,fp8,fp8,0,1.4414879480997722
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,4,128,1,float16,fp8,0,2.0826026598612466
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,4,128,1,float16,float16,0,2.099658648173014
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,8,128,1,float16,float16,0,2.110485394795736
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,8,128,1,float16,fp8,0,2.0780800183614097
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,16,8,128,1,fp8,fp8,0,1.44870392481486
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,16,1,128,1,fp8,fp8,0,5.986207962036133
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,16,1,128,1,float16,float16,0,9.617082595825195
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,16,2,128,1,float16,float16,0,9.565546671549479
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,16,1,128,1,float16,fp8,0,9.649210611979166
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,16,2,128,1,float16,fp8,0,9.542943954467773
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,16,2,128,1,fp8,fp8,0,6.088549296061198
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,16,4,128,1,float16,fp8,0,9.497845331827799
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,16,4,128,1,float16,float16,0,9.51027743021647
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,1,128,1,float16,float16,0,4.619333267211914
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,16,4,128,1,fp8,fp8,0,6.293338775634766
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,16,128,1,float16,float16,0,4.597418785095215
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,16,128,1,float16,fp8,0,4.457653363545735
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,16,8,128,1,float16,float16,0,9.820880254109701
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,16,8,128,1,float16,fp8,0,9.57315190633138
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,16,128,1,fp8,fp8,0,3.0600481033325195
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,16,8,128,1,fp8,fp8,0,6.366144180297852
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,1,128,1,float16,fp8,0,4.626325289408366
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,1,128,1,fp8,fp8,0,3.094688097635905
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,2,128,1,float16,float16,0,4.744256019592285
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,2,128,1,float16,fp8,0,4.6204268137613935
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,2,128,1,fp8,fp8,0,3.086655934651693
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,4,128,1,fp8,fp8,0,3.0548267364501953
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,4,128,1,float16,fp8,0,4.540250778198242
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,4,128,1,float16,float16,0,4.555472056070964
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,8,128,1,float16,float16,0,4.658890724182129
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,16,128,1,float16,float16,0,2.301018714904785
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,8,128,1,float16,fp8,0,4.708474795023601
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,1,128,1,float16,float16,0,2.3482933044433594
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,1,128,1,float16,fp8,0,2.3091893196105957
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,16,8,128,1,fp8,fp8,0,3.145104090372721
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,16,128,1,float16,fp8,0,2.3080639839172363
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,1,128,1,fp8,fp8,0,1.5868959426879883
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,2,128,1,float16,float16,0,2.341584046681722
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,16,128,1,fp8,fp8,0,1.5900479952494304
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,2,128,1,float16,fp8,0,2.339461326599121
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,2,128,1,fp8,fp8,0,1.596560001373291
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,4,128,1,float16,float16,0,2.3378186225891113
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,4,128,1,float16,fp8,0,2.321440060933431
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,4,128,1,fp8,fp8,0,1.613573392232259
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,8,128,1,float16,float16,0,2.334709326426188
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,16,128,1,float16,float16,0,1.2483733495076497
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,1,128,1,float16,float16,0,1.26146133740743
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,16,128,1,float16,fp8,0,1.2404533227284749
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,8,128,1,float16,fp8,0,2.3461173375447593
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,1,128,1,float16,fp8,0,1.2595787048339844
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,16,8,128,1,fp8,fp8,0,1.6179787317911785
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,1,128,1,fp8,fp8,0,0.8260746796925863
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,16,128,1,fp8,fp8,0,0.8252960046132406
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,2,128,1,float16,float16,0,1.2732906341552734
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,2,128,1,float16,fp8,0,1.2526453336079915
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,2,128,1,fp8,fp8,0,0.8301119804382324
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,4,128,1,float16,float16,0,1.270154635111491
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,4,128,1,float16,fp8,0,1.2655359903971355
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,4,128,1,fp8,fp8,0,0.831397294998169
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,8,128,1,float16,float16,0,1.2637759844462078
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,8,128,1,float16,fp8,0,1.2607680161794026
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,16,8,128,1,fp8,fp8,0,0.851909319559733
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,16,1,128,1,float16,fp8,0,6.754005432128906
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,16,1,128,1,float16,float16,0,6.632997512817383
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,16,1,128,1,fp8,fp8,0,4.260586738586426
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,16,2,128,1,float16,float16,0,6.664181391398112
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,16,2,128,1,float16,fp8,0,6.758186976114909
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,16,2,128,1,fp8,fp8,0,4.260832150777181
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,16,4,128,1,float16,float16,0,6.609216054280599
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,16,4,128,1,float16,fp8,0,6.714405059814453
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,16,4,128,1,fp8,fp8,0,4.470773379007976
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,1,128,1,float16,float16,0,3.2527198791503906
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,16,128,1,float16,float16,0,3.181141217549642
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,16,128,1,float16,fp8,0,3.222208023071289
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,16,8,128,1,float16,float16,0,6.867082595825195
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,16,128,1,fp8,fp8,0,2.213616053263346
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,1,128,1,float16,fp8,0,3.3446613947550454
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,16,8,128,1,fp8,fp8,0,4.423173268636067
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,16,8,128,1,float16,fp8,0,6.711221059163411
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,1,128,1,fp8,fp8,0,2.2003626823425293
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,2,128,1,float16,float16,0,3.289072036743164
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,2,128,1,float16,fp8,0,3.2077067693074546
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,2,128,1,fp8,fp8,0,2.1960585912068686
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,4,128,1,float16,float16,0,3.217514673868815
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,4,128,1,fp8,fp8,0,2.219754695892334
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,4,128,1,float16,fp8,0,3.2398452758789062
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,8,128,1,float16,float16,0,3.2524585723876953
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,8,128,1,float16,fp8,0,3.3941920598347983
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,16,128,1,float16,fp8,0,1.6459466616312664
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,16,128,1,float16,float16,0,1.645802656809489
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,1,128,1,float16,fp8,0,1.6750186284383137
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,1,128,1,float16,float16,0,1.6921067237854004
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,16,8,128,1,fp8,fp8,0,2.24454402923584
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,1,128,1,fp8,fp8,0,1.1638399759928386
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,2,128,1,float16,float16,0,1.7016000747680664
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,16,128,1,fp8,fp8,0,1.1656160354614258
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,2,128,1,float16,fp8,0,1.6671360333760579
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,2,128,1,fp8,fp8,0,1.1638986269632976
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,4,128,1,float16,fp8,0,1.6874826749165852
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,4,128,1,float16,float16,0,1.6991732915242512
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,4,128,1,fp8,fp8,0,1.1697759628295898
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,8,128,1,float16,float16,0,1.7039039929707844
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,8,128,1,float16,fp8,0,1.704213301340739
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,1,128,1,float16,fp8,0,0.9142666657765707
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,1,128,1,float16,float16,0,0.9280213514963785
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,16,128,1,float16,fp8,0,0.8946293195088705
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,16,128,1,float16,float16,0,0.9032959938049316
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,16,8,128,1,fp8,fp8,0,1.1846506595611572
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,16,128,1,fp8,fp8,0,0.6136053403218588
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,1,128,1,fp8,fp8,0,0.6079893509546915
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,2,128,1,float16,float16,0,0.9323360125223795
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,2,128,1,fp8,fp8,0,0.6099520126978556
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,2,128,1,float16,fp8,0,0.9169013500213623
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,4,128,1,float16,float16,0,0.9246880213419596
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,4,128,1,float16,fp8,0,0.917359987894694
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,8,128,1,float16,float16,0,0.9367466767628988
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,4,128,1,fp8,fp8,0,0.6124586661656698
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,8,128,1,float16,fp8,0,0.9244266351064047
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,16,8,128,1,fp8,fp8,0,0.621120015780131
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,16,1,128,1,fp8,fp8,0,5.79698117574056
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,16,1,128,1,float16,fp8,0,9.020010630289713
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,16,1,128,1,float16,float16,0,8.935797373453775
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,16,2,128,1,float16,fp8,0,8.826192220052084
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,16,2,128,1,float16,float16,0,8.945295969645182
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,16,2,128,1,fp8,fp8,0,5.92245356241862
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,16,4,128,1,float16,float16,0,8.867669423421225
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,16,4,128,1,float16,fp8,0,8.843674977620443
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,1,128,1,float16,float16,0,4.407157262166341
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,16,4,128,1,fp8,fp8,0,5.766223907470703
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,16,128,1,float16,float16,0,4.1377973556518555
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,1,128,1,float16,fp8,0,4.18449592590332
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,16,128,1,float16,fp8,0,4.323829332987468
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,16,8,128,1,float16,float16,0,9.132432301839193
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,16,128,1,fp8,fp8,0,2.9355039596557617
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,16,8,128,1,fp8,fp8,0,6.108245213826497
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,1,128,1,fp8,fp8,0,2.8690719604492188
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,16,8,128,1,float16,fp8,0,9.019765218098959
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,2,128,1,float16,float16,0,4.305493354797363
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,2,128,1,float16,fp8,0,4.207066535949707
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,2,128,1,fp8,fp8,0,2.8841225306193032
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,4,128,1,float16,float16,0,4.467114766438802
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,4,128,1,float16,fp8,0,4.316922823588054
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,4,128,1,fp8,fp8,0,2.943669319152832
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,8,128,1,float16,float16,0,4.278479894002278
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,8,128,1,float16,fp8,0,4.314698537190755
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,1,128,1,float16,float16,0,2.1477227210998535
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,1,128,1,float16,fp8,0,2.149280071258545
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,16,128,1,float16,float16,0,2.159829298655192
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,16,8,128,1,fp8,fp8,0,2.945546785990397
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,1,128,1,fp8,fp8,0,1.4885867436726887
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,2,128,1,float16,float16,0,2.163632074991862
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,2,128,1,float16,fp8,0,2.1331040064493814
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,16,128,1,fp8,fp8,0,1.5239893595377605
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,2,128,1,fp8,fp8,0,1.4991040229797363
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,16,128,1,float16,fp8,0,2.1197546323140464
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,4,128,1,float16,float16,0,2.1816746393839517
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,4,128,1,float16,fp8,0,2.1320692698160806
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,4,128,1,fp8,fp8,0,1.5030345916748047
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,16,128,1,float16,float16,0,1.136362632115682
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,8,128,1,float16,float16,0,2.1885973612467446
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,8,128,1,fp8,fp8,0,1.5301814079284668
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,16,8,128,1,float16,fp8,0,2.160325368245443
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,1,128,1,float16,float16,0,1.1364586353302002
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,16,128,1,float16,fp8,0,1.118832031885783
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,1,128,1,float16,fp8,0,1.1192106405893962
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,16,128,1,fp8,fp8,0,0.8157653013865153
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,1,128,1,fp8,fp8,0,0.8042453130086263
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,2,128,1,float16,float16,0,1.148202657699585
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,2,128,1,fp8,fp8,0,0.807375987370809
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,4,128,1,float16,float16,0,1.1420479615529378
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,2,128,1,float16,fp8,0,1.1285226345062256
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,4,128,1,float16,fp8,0,1.129701296488444
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,4,128,1,fp8,fp8,0,0.8088586330413818
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,8,128,1,float16,float16,0,1.1606933275858562
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,16,128,1,float16,float16,0,0.6275146802266439
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,8,128,1,fp8,fp8,0,0.8212693532307943
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,16,8,128,1,float16,fp8,0,1.1484266916910808
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,16,128,1,float16,fp8,0,0.6214400132497152
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,16,128,1,fp8,fp8,0,0.42663466930389404
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,1,128,1,float16,fp8,0,0.6292159954706827
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,1,128,1,float16,float16,0,0.6387146711349487
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,1,128,1,fp8,fp8,0,0.42475732167561847
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,2,128,1,float16,float16,0,0.6423840125401815
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,2,128,1,float16,fp8,0,0.633242646853129
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,2,128,1,fp8,fp8,0,0.4273120164871216
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,4,128,1,float16,float16,0,0.6407359838485718
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,4,128,1,fp8,fp8,0,0.42691198984781903
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,4,128,1,float16,fp8,0,0.6345706780751547
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,8,128,1,float16,float16,0,0.6443573236465454
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,8,128,1,float16,fp8,0,0.641274650891622
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,16,8,128,1,fp8,fp8,0,0.4336479902267456
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,16,1,128,1,float16,float16,0,5.2753705978393555
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,16,1,128,1,fp8,fp8,0,3.4915361404418945
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,16,1,128,1,float16,fp8,0,5.091759999593099
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,16,2,128,1,float16,float16,0,5.177210807800293
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,16,2,128,1,float16,fp8,0,4.947237332661946
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,16,2,128,1,fp8,fp8,0,3.4543307622273765
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,16,4,128,1,float16,fp8,0,5.0616105397542315
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,16,4,128,1,float16,float16,0,5.245711962381999
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,1,128,1,float16,float16,0,2.523045380910238
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,16,4,128,1,fp8,fp8,0,3.565978686014811
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,16,128,1,float16,float16,0,2.512725353240967
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,16,128,1,float16,fp8,0,2.4974826176961265
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,16,8,128,1,float16,fp8,0,5.058575948079427
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,16,8,128,1,float16,float16,0,5.182191848754883
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,16,8,128,1,fp8,fp8,0,3.5481440226236978
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,16,128,1,fp8,fp8,0,1.8200747172037761
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,1,128,1,float16,fp8,0,2.489674727121989
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,1,128,1,fp8,fp8,0,1.7620959281921387
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,2,128,1,float16,float16,0,2.506063938140869
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,2,128,1,float16,fp8,0,2.4810239473978677
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,2,128,1,fp8,fp8,0,1.7734346389770508
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,4,128,1,float16,fp8,0,2.4666879971822104
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,4,128,1,float16,float16,0,2.510789394378662
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,4,128,1,fp8,fp8,0,1.7889173825581868
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,8,128,1,float16,float16,0,2.578458627065023
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,8,128,1,float16,fp8,0,2.5466133753458657
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,1,128,1,float16,float16,0,1.2932000160217285
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,1,128,1,float16,fp8,0,1.2821706930796306
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,16,128,1,float16,float16,0,1.2946293354034424
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,16,8,128,1,fp8,fp8,0,1.8206559816996257
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,1,128,1,fp8,fp8,0,0.9260373115539551
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,16,128,1,float16,fp8,0,1.3021759986877441
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,2,128,1,float16,float16,0,1.3041600386301677
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,16,128,1,fp8,fp8,0,0.9496906598409017
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,2,128,1,float16,fp8,0,1.2799200216929119
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,2,128,1,fp8,fp8,0,0.9300533135732015
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,4,128,1,float16,float16,0,1.3007520039876301
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,4,128,1,fp8,fp8,0,0.9370720386505127
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,4,128,1,float16,fp8,0,1.2909226417541504
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,8,128,1,float16,float16,0,1.3233973185221355
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,8,128,1,float16,fp8,0,1.312010685602824
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,16,128,1,float16,float16,0,0.694922685623169
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,16,8,128,1,fp8,fp8,0,0.9552799860636393
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,16,128,1,float16,fp8,0,0.6942133108774821
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,1,128,1,float16,float16,0,0.7039893468221029
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,16,128,1,fp8,fp8,0,0.5009440183639526
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,1,128,1,fp8,fp8,0,0.48155200481414795
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,1,128,1,float16,fp8,0,0.6908746560414633
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,2,128,1,float16,float16,0,0.7045813401540121
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,2,128,1,float16,fp8,0,0.6918079853057861
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,2,128,1,fp8,fp8,0,0.48230934143066406
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,4,128,1,float16,float16,0,0.7086026668548584
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,4,128,1,float16,fp8,0,0.700213352839152
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,4,128,1,fp8,fp8,0,0.4862026770909627
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,8,128,1,float16,float16,0,0.7132266362508138
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,8,128,1,fp8,fp8,0,0.4986720085144043
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,16,8,128,1,float16,fp8,0,0.7018559773763021
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,1,128,1,float16,float16,0,0.35438934961954754
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,16,128,1,float16,float16,0,0.35491732756296795
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,16,128,1,float16,fp8,0,0.3502933184305827
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,1,128,1,float16,fp8,0,0.3472213347752889
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,1,128,1,fp8,fp8,0,0.2728319962819417
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,16,128,1,fp8,fp8,0,0.27752000093460083
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,2,128,1,float16,float16,0,0.355402668317159
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,2,128,1,fp8,fp8,0,0.27340267101923627
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,4,128,1,float16,fp8,0,0.34962666034698486
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,4,128,1,float16,float16,0,0.35530134042104083
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,4,128,1,fp8,fp8,0,0.274127999941508
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,2,128,1,float16,fp8,0,0.3477226495742798
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,8,128,1,float16,fp8,0,0.35625600814819336
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,8,128,1,float16,float16,0,0.3611520131429036
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,16,8,128,1,fp8,fp8,0,0.27859199047088623
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,16,1,128,1,float16,fp8,0,4.696314811706543
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,16,1,128,1,fp8,fp8,0,3.481093406677246
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,16,1,128,1,float16,float16,0,5.071669260660808
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,16,2,128,1,float16,float16,0,4.832096099853516
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,16,2,128,1,fp8,fp8,0,3.509552001953125
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,16,2,128,1,float16,fp8,0,4.964000066121419
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,16,4,128,1,float16,float16,0,4.937893231709798
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,16,4,128,1,float16,fp8,0,4.946042696634929
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,1,128,1,float16,float16,0,2.4453066190083823
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,16,4,128,1,fp8,fp8,0,3.5475947062174478
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,16,128,1,float16,float16,0,2.4698239962259927
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,1,128,1,float16,fp8,0,2.402128060658773
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,16,128,1,float16,fp8,0,2.4563679695129395
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,16,128,1,fp8,fp8,0,1.8788746198018391
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,16,8,128,1,float16,float16,0,5.15119997660319
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,16,8,128,1,float16,fp8,0,5.201845486958821
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,1,128,1,fp8,fp8,0,1.7675786018371582
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,16,8,128,1,fp8,fp8,0,3.637968063354492
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,2,128,1,float16,float16,0,2.4439199765523276
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,2,128,1,float16,fp8,0,2.4125067392985025
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,2,128,1,fp8,fp8,0,1.7813706398010254
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,4,128,1,float16,float16,0,2.471834659576416
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,4,128,1,float16,fp8,0,2.411424001057943
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,4,128,1,fp8,fp8,0,1.8103466033935547
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,8,128,1,float16,fp8,0,2.4500746726989746
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,8,128,1,float16,float16,0,2.466357390085856
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,1,128,1,float16,float16,0,1.2401493390401204
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,16,128,1,float16,float16,0,1.272426684697469
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,1,128,1,float16,fp8,0,1.2331466674804688
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,16,8,128,1,fp8,fp8,0,1.8564000129699707
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,1,128,1,fp8,fp8,0,0.9117120107014974
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,16,128,1,float16,fp8,0,1.2556053002675374
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,16,128,1,fp8,fp8,0,0.9676960309346517
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,2,128,1,float16,float16,0,1.2416533629099529
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,2,128,1,float16,fp8,0,1.2263253529866536
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,2,128,1,fp8,fp8,0,0.9172586599985758
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,4,128,1,float16,float16,0,1.2634560267130535
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,4,128,1,float16,fp8,0,1.2376426855723064
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,4,128,1,fp8,fp8,0,0.9270773728688558
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,16,128,1,float16,float16,0,0.6634666522343954
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,16,128,1,float16,fp8,0,0.6649226744969686
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,8,128,1,fp8,fp8,0,0.9529866377512614
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,8,128,1,float16,fp8,0,1.2623519897460938
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,16,8,128,1,float16,float16,0,1.2692853609720867
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,16,128,1,fp8,fp8,0,0.5192106564839681
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,1,128,1,float16,float16,0,0.6608320077260336
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,1,128,1,float16,fp8,0,0.6499679883321127
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,1,128,1,fp8,fp8,0,0.4886026779810588
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,2,128,1,float16,fp8,0,0.6495306491851807
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,2,128,1,float16,float16,0,0.6638026634852091
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,2,128,1,fp8,fp8,0,0.49398934841156006
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,4,128,1,float16,float16,0,0.6626026630401611
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,4,128,1,float16,fp8,0,0.6582613388697306
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,4,128,1,fp8,fp8,0,0.5007839997609457
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,8,128,1,float16,float16,0,0.6727200349171957
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,8,128,1,float16,fp8,0,0.6617653369903564
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,16,128,1,float16,float16,0,0.3707253138224284
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,16,128,1,float16,fp8,0,0.36904533704121906
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,16,8,128,1,fp8,fp8,0,0.5099680026372274
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,1,128,1,float16,float16,0,0.36923734347025555
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,1,128,1,float16,fp8,0,0.3635626633961995
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,1,128,1,fp8,fp8,0,0.25993067026138306
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,2,128,1,float16,float16,0,0.3684426546096802
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,2,128,1,float16,fp8,0,0.3651786645253499
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,2,128,1,fp8,fp8,0,0.26068800687789917
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,16,128,1,fp8,fp8,0,0.27380265792210895
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,4,128,1,float16,float16,0,0.370741327603658
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,4,128,1,float16,fp8,0,0.36796800295511883
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,4,128,1,fp8,fp8,0,0.26467732588450116
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,8,128,1,float16,float16,0,0.3757013479868571
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,8,128,1,float16,fp8,0,0.36982933680216473
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,16,128,1,float16,float16,0,0.1922453244527181
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,1,128,1,float16,float16,0,0.18749332427978516
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,16,8,128,1,fp8,fp8,0,0.2690986593564351
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,16,128,1,fp8,fp8,0,0.15759467085202536
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,1,128,1,float16,fp8,0,0.1848106582959493
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,16,128,1,float16,fp8,0,0.18814400831858316
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,1,128,1,fp8,fp8,0,0.14697600404421488
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,2,128,1,float16,float16,0,0.18634666999181113
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,2,128,1,float16,fp8,0,0.1856266657511393
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,2,128,1,fp8,fp8,0,0.1506666640440623
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,4,128,1,float16,float16,0,0.18902933597564697
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,4,128,1,float16,fp8,0,0.18528532981872559
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,4,128,1,fp8,fp8,0,0.1529866655667623
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,8,128,1,float16,float16,0,0.19021866718928018
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,8,128,1,float16,fp8,0,0.18786134322484335
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,16,8,128,1,fp8,fp8,0,0.15628266334533691
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,16,1,128,1,float16,fp8,0,2.9410346349080405
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,16,1,128,1,float16,float16,0,2.9993600845336914
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,16,1,128,1,fp8,fp8,0,2.2243146896362305
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,16,2,128,1,float16,fp8,0,2.9309120178222656
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,16,2,128,1,fp8,fp8,0,2.2371412913004556
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,16,2,128,1,float16,float16,0,2.993920008341471
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,16,4,128,1,float16,float16,0,3.004864056905111
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,16,4,128,1,float16,fp8,0,2.951343854268392
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,1,128,1,float16,float16,0,1.4981652895609539
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,16,4,128,1,fp8,fp8,0,2.2760586738586426
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,16,128,1,float16,fp8,0,1.5137012799580891
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,16,128,1,float16,float16,0,1.5435093243916829
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,1,128,1,float16,fp8,0,1.4718772570292156
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,16,8,128,1,float16,float16,0,3.1187092463175454
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,16,8,128,1,float16,fp8,0,3.0280319849650064
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,16,128,1,fp8,fp8,0,1.2124906380971272
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,1,128,1,fp8,fp8,0,1.1327306429545085
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,16,8,128,1,fp8,fp8,0,2.3422239621480307
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,2,128,1,float16,float16,0,1.5010933876037598
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,2,128,1,float16,fp8,0,1.4773707389831543
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,2,128,1,fp8,fp8,0,1.1356746355692546
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,4,128,1,float16,fp8,0,1.4985599517822266
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,4,128,1,float16,float16,0,1.527349313100179
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,4,128,1,fp8,fp8,0,1.156549294789632
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,8,128,1,float16,float16,0,1.5544053713480632
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,1,128,1,float16,float16,0,0.7817546526590983
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,1,128,1,float16,fp8,0,0.7629333337148031
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,8,128,1,float16,fp8,0,1.522223949432373
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,16,128,1,float16,fp8,0,0.7871893246968588
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,16,128,1,float16,float16,0,0.7959360281626383
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,1,128,1,fp8,fp8,0,0.5911680062611898
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,16,8,128,1,fp8,fp8,0,1.1901600360870361
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,16,128,1,fp8,fp8,0,0.6341439882914225
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,2,128,1,float16,float16,0,0.7851839860280355
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,2,128,1,float16,fp8,0,0.7660640080769857
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,2,128,1,fp8,fp8,0,0.5959733327229818
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,4,128,1,float16,float16,0,0.7860533396402994
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,4,128,1,float16,fp8,0,0.7728373209635416
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,4,128,1,fp8,fp8,0,0.6042346556981405
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,16,128,1,float16,float16,0,0.4254453182220459
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,8,128,1,float16,fp8,0,0.7895039717356364
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,8,128,1,float16,float16,0,0.7949333190917969
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,16,8,128,1,fp8,fp8,0,0.6208320061365763
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,16,128,1,float16,fp8,0,0.4256746768951416
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,1,128,1,float16,float16,0,0.418229341506958
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,1,128,1,float16,fp8,0,0.4129759867986043
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,1,128,1,fp8,fp8,0,0.3083946704864502
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,16,128,1,fp8,fp8,0,0.34251201152801514
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,2,128,1,float16,float16,0,0.42204801241556805
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,2,128,1,float16,fp8,0,0.4130026499430339
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,2,128,1,fp8,fp8,0,0.3120159904162089
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,4,128,1,fp8,fp8,0,0.315829336643219
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,4,128,1,float16,float16,0,0.4275999863942464
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,4,128,1,float16,fp8,0,0.41697601477305096
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,8,128,1,float16,float16,0,0.4284213383992513
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,8,128,1,float16,fp8,0,0.4238933324813843
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,16,128,1,float16,float16,0,0.22154132525126138
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,16,8,128,1,fp8,fp8,0,0.32791467507680255
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,16,128,1,float16,fp8,0,0.22158400217692056
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,1,128,1,float16,float16,0,0.2154560089111328
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,1,128,1,float16,fp8,0,0.20994667212168375
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,16,128,1,fp8,fp8,0,0.18508267402648926
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,1,128,1,fp8,fp8,0,0.17161067326863608
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,2,128,1,float16,float16,0,0.21554666757583618
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,2,128,1,float16,fp8,0,0.21082667509714761
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,2,128,1,fp8,fp8,0,0.1732693314552307
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,4,128,1,float16,float16,0,0.2183039983113607
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,4,128,1,float16,fp8,0,0.21369065841039023
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,4,128,1,fp8,fp8,0,0.17515732844670615
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,8,128,1,float16,float16,0,0.22196267048517862
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,8,128,1,float16,fp8,0,0.2174773414929708
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,16,8,128,1,fp8,fp8,0,0.180293341477712
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,16,128,1,float16,float16,0,0.1292586624622345
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,16,128,1,float16,fp8,0,0.12865599989891052
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,1,128,1,float16,float16,0,0.12786133090655008
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,16,128,1,fp8,fp8,0,0.11090133587519328
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,1,128,1,fp8,fp8,0,0.10135466853777568
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,1,128,1,float16,fp8,0,0.12463466326395671
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,2,128,1,float16,float16,0,0.12710932890574136
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,2,128,1,float16,fp8,0,0.12527466813723245
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,2,128,1,fp8,fp8,0,0.10139200091362
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,4,128,1,float16,float16,0,0.12723732988039652
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,4,128,1,float16,fp8,0,0.12658666570981345
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,4,128,1,fp8,fp8,0,0.10412800312042236
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,8,128,1,float16,float16,0,0.12962133685747781
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,8,128,1,fp8,fp8,0,0.10849066575368245
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,16,8,128,1,float16,fp8,0,0.12693867087364197
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,16,1,128,1,float16,float16,0,3.1347999572753906
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,16,1,128,1,float16,fp8,0,3.0098934173583984
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,16,1,128,1,fp8,fp8,0,2.450175921122233
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,16,2,128,1,float16,float16,0,3.176911989847819
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,16,2,128,1,float16,fp8,0,3.0616321563720703
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,16,2,128,1,fp8,fp8,0,2.472853342692057
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,16,4,128,1,float16,float16,0,3.1834561030069985
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,16,4,128,1,float16,fp8,0,3.0657599767049155
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,1,128,1,float16,float16,0,1.5663785934448242
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,16,4,128,1,fp8,fp8,0,2.4925759633382163
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,16,128,1,float16,float16,0,1.6558079719543457
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,16,128,1,float16,fp8,0,1.617626667022705
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,16,128,1,fp8,fp8,0,1.3501386642456055
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,16,8,128,1,float16,float16,0,3.262319882710775
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,1,128,1,float16,fp8,0,1.5196320215861003
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,1,128,1,fp8,fp8,0,1.232149362564087
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,16,8,128,1,float16,fp8,0,3.2144479751586914
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,16,8,128,1,fp8,fp8,0,2.5806239446004233
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,2,128,1,float16,float16,0,1.5811840693155925
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,2,128,1,float16,fp8,0,1.5390079816182454
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,2,128,1,fp8,fp8,0,1.2421066761016846
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,4,128,1,float16,float16,0,1.5833172798156738
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,4,128,1,float16,fp8,0,1.5447146097819011
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,4,128,1,fp8,fp8,0,1.2605706850687664
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,16,128,1,float16,float16,0,0.8349546591440836
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,8,128,1,float16,float16,0,1.6329654057820637
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,8,128,1,float16,fp8,0,1.5827679634094238
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,1,128,1,float16,fp8,0,0.7777226765950521
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,1,128,1,float16,float16,0,0.794490655263265
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,16,8,128,1,fp8,fp8,0,1.3100159962972004
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,16,128,1,float16,fp8,0,0.8178826967875162
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,16,128,1,fp8,fp8,0,0.6937493483225504
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,1,128,1,fp8,fp8,0,0.6322240034739176
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,2,128,1,float16,fp8,0,0.7850879828135172
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,2,128,1,float16,float16,0,0.8010666370391846
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,2,128,1,fp8,fp8,0,0.6350986560185751
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,4,128,1,float16,float16,0,0.8046773274739584
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,4,128,1,float16,fp8,0,0.793183962504069
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,4,128,1,fp8,fp8,0,0.6466879844665527
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,1,128,1,float16,float16,0,0.4214613437652588
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,16,128,1,float16,float16,0,0.4373706579208374
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,8,128,1,float16,float16,0,0.8177119890848795
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,16,128,1,float16,fp8,0,0.43038400014241535
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,8,128,1,float16,fp8,0,0.8106506665547689
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,16,8,128,1,fp8,fp8,0,0.6700053215026855
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,16,128,1,fp8,fp8,0,0.368010679880778
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,1,128,1,float16,fp8,0,0.411631981531779
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,1,128,1,fp8,fp8,0,0.3359520037968953
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,2,128,1,float16,float16,0,0.4234773317972819
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,2,128,1,float16,fp8,0,0.4123946825663249
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,2,128,1,fp8,fp8,0,0.33921066919962567
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,4,128,1,float16,float16,0,0.427562673886617
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,4,128,1,fp8,fp8,0,0.3437066475550334
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,4,128,1,float16,fp8,0,0.4191360076268514
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,8,128,1,float16,float16,0,0.4328800042470296
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,8,128,1,float16,fp8,0,0.4284426768620809
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,16,128,1,float16,float16,0,0.2395626703898112
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,16,8,128,1,fp8,fp8,0,0.3545546531677246
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,1,128,1,float16,float16,0,0.23111999034881592
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,1,128,1,float16,fp8,0,0.22806400060653687
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,16,128,1,float16,fp8,0,0.23856000105539957
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,16,128,1,fp8,fp8,0,0.1992959976196289
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,1,128,1,fp8,fp8,0,0.17760533094406128
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,2,128,1,float16,float16,0,0.23513599236806235
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,2,128,1,float16,fp8,0,0.22827200094858804
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,2,128,1,fp8,fp8,0,0.17781333128611246
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,4,128,1,float16,fp8,0,0.2330133318901062
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,4,128,1,float16,float16,0,0.23531200488408408
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,4,128,1,fp8,fp8,0,0.18156800667444864
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,8,128,1,float16,float16,0,0.2410879929860433
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,8,128,1,float16,fp8,0,0.23718400796254477
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,16,8,128,1,fp8,fp8,0,0.18897066513697305
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,16,128,1,float16,float16,0,0.12552533547083536
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,1,128,1,float16,float16,0,0.11936533451080322
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,16,128,1,float16,fp8,0,0.12541866302490234
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,16,128,1,fp8,fp8,0,0.11160000165303548
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,1,128,1,float16,fp8,0,0.1169599990049998
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,1,128,1,fp8,fp8,0,0.09893332918485005
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,2,128,1,float16,float16,0,0.11968533198038737
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,2,128,1,float16,fp8,0,0.11818666259447734
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,2,128,1,fp8,fp8,0,0.10187733173370361
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,4,128,1,float16,float16,0,0.12134400010108948
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,4,128,1,float16,fp8,0,0.12044266859690349
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,8,128,1,float16,fp8,0,0.12222933769226074
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,4,128,1,fp8,fp8,0,0.10450133681297302
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,8,128,1,float16,float16,0,0.12383466958999634
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,16,8,128,1,fp8,fp8,0,0.10810133814811707
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,16,128,1,float16,float16,0,0.07439466814200084
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,16,128,1,float16,fp8,0,0.07398933172225952
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,1,128,1,float16,float16,0,0.0727893312772115
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,16,128,1,fp8,fp8,0,0.06958400209744771
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,1,128,1,float16,fp8,0,0.07125333448251088
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,2,128,1,float16,float16,0,0.07256533205509186
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,1,128,1,fp8,fp8,0,0.06208533545335134
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,2,128,1,float16,fp8,0,0.0710399995247523
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,2,128,1,fp8,fp8,0,0.06287999947865804
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,4,128,1,float16,float16,0,0.07242133220036824
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,4,128,1,float16,fp8,0,0.07154666880766551
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,4,128,1,fp8,fp8,0,0.06307200094064076
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,8,128,1,float16,float16,0,0.07339199880758922
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,8,128,1,float16,fp8,0,0.0721013347307841
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,16,8,128,1,fp8,fp8,0,0.06498666604359944
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,16,1,128,1,float16,float16,0,2.055898666381836
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,16,1,128,1,float16,fp8,0,1.9461493492126465
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,16,1,128,1,fp8,fp8,0,1.6709334055582683
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,16,2,128,1,float16,float16,0,2.0712480545043945
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,16,2,128,1,fp8,fp8,0,1.6848586400349934
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,16,2,128,1,float16,fp8,0,1.9538933436075847
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,16,4,128,1,float16,float16,0,2.089008013407389
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,16,4,128,1,float16,fp8,0,1.9752960205078125
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,1,128,1,float16,float16,0,1.024901310602824
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,16,4,128,1,fp8,fp8,0,1.6978400548299153
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,16,128,1,float16,float16,0,1.100826660792033
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,16,128,1,float16,fp8,0,1.0543200174967449
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,16,8,128,1,float16,float16,0,2.16321070988973
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,16,128,1,fp8,fp8,0,0.9345280329386393
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,1,128,1,float16,fp8,0,0.9874080022176107
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,16,8,128,1,float16,fp8,0,2.013498624165853
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,1,128,1,fp8,fp8,0,0.8385012944539388
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,16,8,128,1,fp8,fp8,0,1.7583573659261067
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,2,128,1,float16,float16,0,1.0297173659006755
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,2,128,1,float16,fp8,0,0.9913653532663981
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,2,128,1,fp8,fp8,0,0.8488746484120687
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,4,128,1,float16,float16,0,1.0404799779256184
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,4,128,1,float16,fp8,0,1.0016586780548096
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,4,128,1,fp8,fp8,0,0.8589173158009847
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,8,128,1,float16,float16,0,1.0681227048238118
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,8,128,1,float16,fp8,0,1.0276319980621338
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,1,128,1,float16,float16,0,0.5244746605555216
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,1,128,1,float16,fp8,0,0.5081066687901815
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,16,128,1,float16,float16,0,0.5511999924977621
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,16,128,1,float16,fp8,0,0.5472586552302042
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,16,8,128,1,fp8,fp8,0,0.8963253498077393
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,1,128,1,fp8,fp8,0,0.4335840145746867
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,16,128,1,fp8,fp8,0,0.4843253294626872
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,2,128,1,float16,float16,0,0.5276266733805338
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,2,128,1,float16,fp8,0,0.5135840177536011
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,2,128,1,fp8,fp8,0,0.43537068367004395
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,4,128,1,float16,float16,0,0.5307626724243164
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,4,128,1,float16,fp8,0,0.5208213329315186
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,4,128,1,fp8,fp8,0,0.44673601786295575
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,8,128,1,float16,float16,0,0.5440906683603922
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,16,128,1,float16,float16,0,0.2947840094566345
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,16,128,1,float16,fp8,0,0.2897653381029765
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,8,128,1,float16,fp8,0,0.5313973426818848
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,16,8,128,1,fp8,fp8,0,0.4607306718826294
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,1,128,1,float16,float16,0,0.28134934107462567
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,1,128,1,float16,fp8,0,0.2739040056864421
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,16,128,1,fp8,fp8,0,0.25893332560857135
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,1,128,1,fp8,fp8,0,0.22300267219543457
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,2,128,1,float16,float16,0,0.28257066011428833
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,2,128,1,float16,fp8,0,0.2751520077387492
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,2,128,1,fp8,fp8,0,0.22479466597239176
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,4,128,1,float16,float16,0,0.28563199440638226
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,4,128,1,float16,fp8,0,0.279968003431956
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,4,128,1,fp8,fp8,0,0.22993600368499756
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,8,128,1,float16,float16,0,0.29016000032424927
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,8,128,1,float16,fp8,0,0.2851039965947469
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,1,128,1,float16,float16,0,0.14773333072662354
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,16,128,1,float16,fp8,0,0.15626133481661478
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,16,128,1,float16,float16,0,0.15737066666285196
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,1,128,1,float16,fp8,0,0.14434666434923807
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,16,8,128,1,fp8,fp8,0,0.24217599630355835
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,1,128,1,fp8,fp8,0,0.12402666608492534
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,2,128,1,float16,float16,0,0.147407998641332
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,16,128,1,fp8,fp8,0,0.13953066865603128
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,2,128,1,float16,fp8,0,0.14603733023007712
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,2,128,1,fp8,fp8,0,0.12528533736864725
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,4,128,1,float16,float16,0,0.14909332990646362
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,4,128,1,float16,fp8,0,0.14693333705266318
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,4,128,1,fp8,fp8,0,0.12827199697494507
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,8,128,1,float16,float16,0,0.15334399541219076
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,1,128,1,float16,float16,0,0.08293866614500682
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,1,128,1,float16,fp8,0,0.08171199758847554
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,16,128,1,float16,float16,0,0.08942932883898418
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,16,128,1,fp8,fp8,0,0.0819893330335617
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,16,128,1,float16,fp8,0,0.08757332960764568
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,8,128,1,float16,fp8,0,0.15058133006095886
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,1,128,1,fp8,fp8,0,0.07043733199437459
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,16,8,128,1,fp8,fp8,0,0.13274666666984558
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,2,128,1,float16,float16,0,0.08335999647776286
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,2,128,1,fp8,fp8,0,0.07161599894364674
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,2,128,1,float16,fp8,0,0.08185600241025288
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,4,128,1,float16,fp8,0,0.08232533435026805
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,4,128,1,float16,float16,0,0.0839519997437795
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,4,128,1,fp8,fp8,0,0.07358933488527934
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,8,128,1,float16,float16,0,0.08618133266766866
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,8,128,1,float16,fp8,0,0.08592533071835835
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,16,8,128,1,fp8,fp8,0,0.07842133442560832
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,16,128,1,float16,float16,0,0.054117331902186074
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,16,128,1,float16,fp8,0,0.053914666175842285
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,1,128,1,float16,float16,0,0.05394133428732554
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,1,128,1,float16,fp8,0,0.05299733579158783
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,16,128,1,fp8,fp8,0,0.05000533163547516
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,1,128,1,fp8,fp8,0,0.0470773329337438
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,2,128,1,float16,fp8,0,0.05354666709899902
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,2,128,1,fp8,fp8,0,0.04776533444722494
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,4,128,1,float16,float16,0,0.05433600147565206
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,4,128,1,float16,fp8,0,0.05310933291912079
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,4,128,1,fp8,fp8,0,0.0480373352766037
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,8,128,1,float16,float16,0,0.054085334142049156
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,2,128,1,float16,float16,0,0.054330666859944664
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,8,128,1,float16,fp8,0,0.05349333087603251
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,16,8,128,1,fp8,fp8,0,0.04939733445644379
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,16,1,128,1,float16,float16,0,1.9028426806132
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,16,1,128,1,fp8,fp8,0,1.6451466878255208
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,16,2,128,1,float16,float16,0,1.9382079442342122
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,16,1,128,1,float16,fp8,0,1.8922826449076335
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,16,2,128,1,float16,fp8,0,1.9188906351725261
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,16,2,128,1,fp8,fp8,0,1.726869265238444
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,16,4,128,1,float16,float16,0,2.0389866828918457
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,16,4,128,1,float16,fp8,0,2.070746739705404
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,1,128,1,float16,float16,0,0.9651412963867188
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,16,4,128,1,fp8,fp8,0,1.876421292622884
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,16,128,1,float16,float16,0,1.1066986719767253
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,16,128,1,float16,fp8,0,1.1030720074971516
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,16,128,1,fp8,fp8,0,0.959013303120931
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,1,128,1,float16,fp8,0,0.9615253607432047
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,16,8,128,1,float16,float16,0,2.1026506423950195
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,16,8,128,1,float16,fp8,0,2.115071932474772
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,16,8,128,1,fp8,fp8,0,1.883994738260905
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,1,128,1,fp8,fp8,0,0.8240640163421631
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,2,128,1,float16,float16,0,0.9719146887461344
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,2,128,1,float16,fp8,0,0.9701279799143473
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,2,128,1,fp8,fp8,0,0.8635040124257406
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,4,128,1,float16,float16,0,1.0285226504007976
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,4,128,1,float16,fp8,0,1.0229653517405193
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,4,128,1,fp8,fp8,0,0.9487199783325195
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,8,128,1,float16,fp8,0,1.0689386526743572
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,8,128,1,float16,float16,0,1.0463626384735107
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,1,128,1,float16,fp8,0,0.4944426616032918
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,1,128,1,float16,float16,0,0.494704008102417
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,16,128,1,float16,float16,0,0.5602720181147257
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,16,128,1,float16,fp8,0,0.5529173215230306
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,16,8,128,1,fp8,fp8,0,0.9510293006896973
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,1,128,1,fp8,fp8,0,0.42029333114624023
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,2,128,1,float16,float16,0,0.4997386535008748
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,2,128,1,float16,fp8,0,0.49933334191640216
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,2,128,1,fp8,fp8,0,0.4386986494064331
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,16,128,1,fp8,fp8,0,0.49243732293446857
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,4,128,1,float16,float16,0,0.5179733435312907
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,4,128,1,fp8,fp8,0,0.48255467414855957
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,4,128,1,float16,fp8,0,0.5126986503601074
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,8,128,1,float16,float16,0,0.5267786582310995
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,16,128,1,float16,float16,0,0.29004265864690143
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,1,128,1,float16,float16,0,0.26126933097839355
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,8,128,1,float16,fp8,0,0.5378773212432861
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,1,128,1,float16,fp8,0,0.2584906617800395
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,16,128,1,float16,fp8,0,0.2863626678784688
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,16,8,128,1,fp8,fp8,0,0.4886346658070882
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,16,128,1,fp8,fp8,0,0.2557813326517741
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,1,128,1,fp8,fp8,0,0.219925324122111
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,2,128,1,float16,float16,0,0.26160534222920734
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,2,128,1,float16,fp8,0,0.26174400250116986
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,2,128,1,fp8,fp8,0,0.22825600703557333
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,4,128,1,float16,float16,0,0.26979732513427734
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,4,128,1,float16,fp8,0,0.2707466681798299
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,4,128,1,fp8,fp8,0,0.25151999791463214
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,16,128,1,float16,float16,0,0.1588159998257955
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,8,128,1,float16,float16,0,0.2744906743367513
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,8,128,1,float16,fp8,0,0.27684799830118817
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,16,128,1,float16,fp8,0,0.15688533584276834
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,1,128,1,float16,float16,0,0.14380266269048056
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,1,128,1,float16,fp8,0,0.14202666282653809
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,16,128,1,fp8,fp8,0,0.12994666894276938
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,16,8,128,1,fp8,fp8,0,0.25377599398295086
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,1,128,1,fp8,fp8,0,0.11213333408037822
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,2,128,1,fp8,fp8,0,0.11425066987673442
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,2,128,1,float16,fp8,0,0.14443733294804892
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,2,128,1,float16,float16,0,0.14458666245142618
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,4,128,1,float16,fp8,0,0.1483519971370697
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,4,128,1,fp8,fp8,0,0.12518399953842163
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,4,128,1,float16,float16,0,0.14937067031860352
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,8,128,1,float16,float16,0,0.15219199657440186
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,1,128,1,float16,float16,0,0.07373866438865662
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,16,128,1,float16,float16,0,0.08405866225560506
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,8,128,1,float16,fp8,0,0.14991999665896097
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,16,128,1,float16,fp8,0,0.08246399958928426
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,16,128,1,fp8,fp8,0,0.07160533467928569
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,16,8,128,1,fp8,fp8,0,0.12710400422414145
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,1,128,1,float16,fp8,0,0.07507733503977458
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,1,128,1,fp8,fp8,0,0.06169599791367849
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,2,128,1,float16,float16,0,0.07481599847475688
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,2,128,1,float16,fp8,0,0.07588266829649608
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,2,128,1,fp8,fp8,0,0.06260799864927928
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,4,128,1,float16,fp8,0,0.07813333471616109
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,4,128,1,fp8,fp8,0,0.06909866631031036
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,4,128,1,float16,float16,0,0.0780213326215744
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,8,128,1,float16,fp8,0,0.07909333209196727
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,8,128,1,float16,float16,0,0.07959466675917308
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,16,128,1,float16,float16,0,0.04660800099372864
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,1,128,1,float16,float16,0,0.042506664991378784
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,1,128,1,float16,fp8,0,0.04247466723124186
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,16,8,128,1,fp8,fp8,0,0.06930133203665416
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,16,128,1,float16,fp8,0,0.0459199994802475
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,16,128,1,fp8,fp8,0,0.043391997615496315
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,2,128,1,float16,float16,0,0.04298666616280874
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,1,128,1,fp8,fp8,0,0.03629333277543386
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,2,128,1,fp8,fp8,0,0.03728000074625015
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,4,128,1,float16,float16,0,0.04398933549722036
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,4,128,1,float16,fp8,0,0.04390400151411692
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,2,128,1,float16,fp8,0,0.04312533140182495
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,4,128,1,fp8,fp8,0,0.03955200066169103
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,8,128,1,float16,float16,0,0.04389866689840952
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,8,128,1,float16,fp8,0,0.04435733457406362
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,16,8,128,1,fp8,fp8,0,0.039850667119026184
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,16,128,1,float16,float16,0,0.03286933402220408
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,1,128,1,float16,float16,0,0.032058666149775185
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,16,128,1,float16,fp8,0,0.032485333581765495
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,16,128,1,fp8,fp8,0,0.028880000114440918
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,1,128,1,float16,fp8,0,0.032085334261258446
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,1,128,1,fp8,fp8,0,0.02731200059254964
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,2,128,1,float16,float16,0,0.032074667513370514
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,2,128,1,float16,fp8,0,0.032325332363446556
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,2,128,1,fp8,fp8,0,0.027957332630952198
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,4,128,1,float16,float16,0,0.03242133309443792
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,4,128,1,fp8,fp8,0,0.028815999627113342
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,8,128,1,float16,fp8,0,0.03294933338960012
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,8,128,1,float16,float16,0,0.03292799989382426
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,4,128,1,float16,fp8,0,0.03275733441114426
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,16,8,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,16,1,128,1,float16,float16,0,1.4822400410970051
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,16,1,128,1,fp8,fp8,0,1.368293285369873
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,16,1,128,1,float16,fp8,0,1.4761120478312175
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,16,2,128,1,float16,float16,0,1.5097920099894206
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,16,2,128,1,fp8,fp8,0,1.4784587224324544
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,16,2,128,1,float16,fp8,0,1.5144106547037761
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,16,4,128,1,float16,float16,0,1.6279786427815754
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,16,4,128,1,float16,fp8,0,1.649818738301595
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,16,4,128,1,fp8,fp8,0,1.605237325032552
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,1,128,1,float16,float16,0,0.7551626364390055
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,16,128,1,float16,float16,0,0.9124053319295248
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,16,128,1,float16,fp8,0,0.89738663037618
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,16,128,1,fp8,fp8,0,0.8371093273162842
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,16,8,128,1,float16,float16,0,1.6912533442179363
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,16,8,128,1,fp8,fp8,0,1.6101439793904622
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,1,128,1,float16,fp8,0,0.7475146452585856
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,16,8,128,1,float16,fp8,0,1.7066399256388347
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,1,128,1,fp8,fp8,0,0.6872159639994303
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,2,128,1,float16,float16,0,0.7619626522064209
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,2,128,1,float16,fp8,0,0.75874129931132
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,2,128,1,fp8,fp8,0,0.7327093283335367
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,4,128,1,float16,float16,0,0.8193066914876302
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,4,128,1,float16,fp8,0,0.818997303644816
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,4,128,1,fp8,fp8,0,0.8077600002288818
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,16,128,1,float16,float16,0,0.46215999126434326
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,1,128,1,float16,float16,0,0.38518933455149335
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,1,128,1,float16,fp8,0,0.3842506806055705
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,8,128,1,float16,float16,0,0.8353866736094157
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,8,128,1,fp8,fp8,0,0.8106186389923096
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,16,128,1,float16,fp8,0,0.4515519936879476
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,16,8,128,1,float16,fp8,0,0.8542826970418295
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,16,128,1,fp8,fp8,0,0.4235999981562297
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,1,128,1,fp8,fp8,0,0.3463519811630249
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,2,128,1,float16,fp8,0,0.3906559944152832
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,2,128,1,float16,float16,0,0.3899093468983968
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,2,128,1,fp8,fp8,0,0.37068267663319904
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,4,128,1,float16,fp8,0,0.40559999148050946
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,4,128,1,float16,float16,0,0.4078506628672282
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,4,128,1,fp8,fp8,0,0.41703466574350995
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,8,128,1,float16,float16,0,0.41847999890645343
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,1,128,1,float16,float16,0,0.20308800538380942
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,1,128,1,float16,fp8,0,0.20434133211771646
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,8,128,1,float16,fp8,0,0.42712001005808514
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,16,128,1,float16,fp8,0,0.23466134071350098
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,16,8,128,1,fp8,fp8,0,0.41677331924438477
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,16,128,1,fp8,fp8,0,0.2200373411178589
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,1,128,1,fp8,fp8,0,0.1821546753247579
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,16,128,1,float16,float16,0,0.2384106715520223
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,2,128,1,float16,float16,0,0.20595200856526694
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,2,128,1,float16,fp8,0,0.20722667376200357
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,2,128,1,fp8,fp8,0,0.19089599450429282
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,4,128,1,fp8,fp8,0,0.21303999423980713
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,4,128,1,float16,fp8,0,0.2120586633682251
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,4,128,1,float16,float16,0,0.21473066012064615
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,8,128,1,float16,float16,0,0.220634659131368
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,8,128,1,float16,fp8,0,0.21782400210698447
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,16,8,128,1,fp8,fp8,0,0.21464000145594278
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,1,128,1,float16,float16,0,0.11173866192499797
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,1,128,1,float16,fp8,0,0.11142933368682861
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,16,128,1,float16,float16,0,0.1301706631978353
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,16,128,1,float16,fp8,0,0.12727466225624084
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,16,128,1,fp8,fp8,0,0.11450133721033733
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,1,128,1,fp8,fp8,0,0.09443199634552002
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,2,128,1,float16,float16,0,0.11337600151697795
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,2,128,1,float16,fp8,0,0.11306132872899373
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,2,128,1,fp8,fp8,0,0.09617599844932556
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,4,128,1,float16,float16,0,0.11898666620254517
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,4,128,1,fp8,fp8,0,0.10724799831708272
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,8,128,1,float16,float16,0,0.12070932984352112
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,4,128,1,float16,fp8,0,0.1185653309027354
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,8,128,1,float16,fp8,0,0.1202186644077301
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,16,128,1,float16,float16,0,0.07162133355935414
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,1,128,1,float16,float16,0,0.06113600234190623
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,16,8,128,1,fp8,fp8,0,0.10898666580518086
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,16,128,1,float16,fp8,0,0.06957333286603291
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,16,128,1,fp8,fp8,0,0.06442133088906606
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,1,128,1,float16,fp8,0,0.06098133325576782
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,2,128,1,float16,float16,0,0.06256533165772755
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,2,128,1,float16,fp8,0,0.06225599845250448
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,1,128,1,fp8,fp8,0,0.05199466645717621
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,2,128,1,fp8,fp8,0,0.05557866891225179
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,4,128,1,float16,float16,0,0.06453866759936015
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,4,128,1,fp8,fp8,0,0.061093335350354515
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,4,128,1,float16,fp8,0,0.06462400158246358
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,8,128,1,float16,float16,0,0.06690133114655812
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,8,128,1,float16,fp8,0,0.06676266590754192
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,16,128,1,float16,fp8,0,0.040565334260463715
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,1,128,1,float16,float16,0,0.03642666588226954
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,1,128,1,float16,fp8,0,0.036544000109036766
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,16,8,128,1,fp8,fp8,0,0.06136533121267954
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,16,128,1,float16,float16,0,0.03995733211437861
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,1,128,1,fp8,fp8,0,0.03193599979082743
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,16,128,1,fp8,fp8,0,0.038405333956082664
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,2,128,1,float16,float16,0,0.036618667344252266
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,2,128,1,float16,fp8,0,0.03702399879693985
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,2,128,1,fp8,fp8,0,0.03315199911594391
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,4,128,1,float16,float16,0,0.037802666425704956
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,4,128,1,float16,fp8,0,0.0378506655494372
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,8,128,1,float16,float16,0,0.03781333317359289
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,8,128,1,fp8,fp8,0,0.035973332822322845
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,8,128,1,float16,fp8,0,0.03786666691303253
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,16,4,128,1,fp8,fp8,0,0.035455999275048576
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,16,128,1,float16,fp8,0,0.02665599932273229
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,1,128,1,float16,float16,0,0.025125332176685333
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,16,128,1,fp8,fp8,0,0.02493866781393687
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,1,128,1,float16,fp8,0,0.025237334271272022
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,16,128,1,float16,float16,0,0.026591998835404713
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,1,128,1,fp8,fp8,0,0.02332266668478648
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,4,128,1,float16,float16,0,0.0259253333012263
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,2,128,1,fp8,fp8,0,0.023749334116776783
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,2,128,1,float16,float16,0,0.025205334027608235
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,4,128,1,float16,fp8,0,0.026309333741664886
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,4,128,1,fp8,fp8,0,0.024512000381946564
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,2,128,1,float16,fp8,0,0.025674665967623394
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,8,128,1,float16,fp8,0,0.026501332720120747
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,8,128,1,fp8,fp8,0,0.025226667523384094
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,16,128,1,float16,float16,0,0.023237332701683044
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,16,8,128,1,float16,float16,0,0.025983999172846477
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,16,128,1,float16,fp8,0,0.023189333577950794
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,16,128,1,fp8,fp8,0,0.020992000897725422
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,1,128,1,float16,float16,0,0.02236266682545344
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,2,128,1,float16,float16,0,0.022837333381175995
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,1,128,1,fp8,fp8,0,0.020469332734743755
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,2,128,1,float16,fp8,0,0.022815999885400135
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,1,128,1,float16,fp8,0,0.02258133391539256
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,4,128,1,float16,float16,0,0.02275199939807256
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,2,128,1,fp8,fp8,0,0.020538666596015293
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,4,128,1,float16,fp8,0,0.023311999936898548
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,4,128,1,fp8,fp8,0,0.02107200026512146
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,8,128,1,float16,float16,0,0.023029332359631855
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,8,128,1,float16,fp8,0,0.02306666721900304
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,16,8,128,1,fp8,fp8,0,0.02123733361562093
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,16,1,128,1,float16,fp8,0,0.6410773197809855
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,16,2,128,1,float16,float16,0,0.6573866605758667
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,16,2,128,1,float16,fp8,0,0.6506506601969401
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,16,1,128,1,float16,float16,0,0.6480480035146078
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,16,1,128,1,fp8,fp8,0,0.5626293420791626
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,16,2,128,1,fp8,fp8,0,0.6255199909210205
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,16,4,128,1,float16,float16,0,0.71123735109965
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,16,4,128,1,float16,fp8,0,0.711306651433309
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,1,128,1,float16,float16,0,0.3331039945284526
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,16,4,128,1,fp8,fp8,0,0.701088031133016
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,16,128,1,float16,fp8,0,0.4028640190760295
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,16,128,1,fp8,fp8,0,0.3696959813435872
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,16,8,128,1,float16,float16,0,0.734229326248169
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,1,128,1,float16,fp8,0,0.3313173254330953
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,16,128,1,float16,float16,0,0.413642684618632
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,16,8,128,1,float16,fp8,0,0.7385066350301107
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,1,128,1,fp8,fp8,0,0.28779733180999756
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,16,8,128,1,fp8,fp8,0,0.7227520147959391
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,2,128,1,float16,float16,0,0.33779199918111164
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,2,128,1,fp8,fp8,0,0.31010133028030396
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,2,128,1,float16,fp8,0,0.3352320194244385
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,4,128,1,float16,float16,0,0.35423465569814044
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,4,128,1,float16,fp8,0,0.35382401943206787
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,4,128,1,fp8,fp8,0,0.3593653440475464
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,8,128,1,float16,fp8,0,0.3693813482920329
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,1,128,1,float16,fp8,0,0.17453332742055258
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,8,128,1,float16,float16,0,0.3604533274968465
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,1,128,1,float16,float16,0,0.17537067333857217
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,16,128,1,float16,float16,0,0.21388266483942667
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,16,128,1,float16,fp8,0,0.2100320061047872
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,16,8,128,1,fp8,fp8,0,0.368618647257487
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,16,128,1,fp8,fp8,0,0.19277334213256836
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,2,128,1,float16,float16,0,0.17769600947697958
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,2,128,1,float16,fp8,0,0.17669866482416788
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,4,128,1,float16,float16,0,0.18530666828155518
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,2,128,1,fp8,fp8,0,0.16448000073432922
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,4,128,1,float16,fp8,0,0.1848213275273641
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,1,128,1,fp8,fp8,0,0.1544533371925354
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,4,128,1,fp8,fp8,0,0.18661334117253622
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,8,128,1,float16,float16,0,0.19155732790629068
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,1,128,1,float16,float16,0,0.09710400303204854
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,1,128,1,float16,fp8,0,0.09660800298055013
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,16,128,1,float16,float16,0,0.11741866668065389
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,8,128,1,float16,fp8,0,0.1890559991200765
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,16,8,128,1,fp8,fp8,0,0.18926932414372763
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,16,128,1,fp8,fp8,0,0.1074720025062561
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,1,128,1,fp8,fp8,0,0.08567999800046285
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,16,128,1,float16,fp8,0,0.11530666550000508
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,2,128,1,float16,fp8,0,0.09845866759618123
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,2,128,1,float16,float16,0,0.09919466574986775
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,2,128,1,fp8,fp8,0,0.08692266543706258
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,4,128,1,fp8,fp8,0,0.09787733356157939
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,4,128,1,float16,float16,0,0.10299733281135559
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,4,128,1,float16,fp8,0,0.10214400291442871
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,8,128,1,float16,float16,0,0.10643200079600017
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,8,128,1,float16,fp8,0,0.1050879955291748
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,1,128,1,float16,fp8,0,0.05440000196297964
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,1,128,1,float16,float16,0,0.05489600201447805
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,16,128,1,float16,float16,0,0.06808533271153767
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,16,128,1,float16,fp8,0,0.0660693347454071
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,1,128,1,fp8,fp8,0,0.04794133206208547
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,16,8,128,1,fp8,fp8,0,0.10091200470924377
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,16,128,1,fp8,fp8,0,0.06029866635799408
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,2,128,1,float16,float16,0,0.055439998706181846
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,2,128,1,fp8,fp8,0,0.049925332268079124
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,4,128,1,float16,fp8,0,0.058176000912984215
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,8,128,1,float16,fp8,0,0.06001066664854685
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,4,128,1,float16,float16,0,0.05885866781075796
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,8,128,1,float16,float16,0,0.060165335734685264
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,4,128,1,fp8,fp8,0,0.055546666185061135
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,2,128,1,float16,fp8,0,0.05525333185990652
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,16,128,1,float16,float16,0,0.03774933268626531
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,1,128,1,float16,float16,0,0.03328000009059906
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,1,128,1,float16,fp8,0,0.033215999603271484
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,16,128,1,fp8,fp8,0,0.03620799879233042
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,16,128,1,float16,fp8,0,0.036474667489528656
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,1,128,1,fp8,fp8,0,0.03013866643110911
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,16,8,128,1,fp8,fp8,0,0.05754133562246958
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,2,128,1,float16,float16,0,0.033674667278925575
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,2,128,1,float16,fp8,0,0.033973333736260734
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,4,128,1,float16,float16,0,0.03430933256944021
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,4,128,1,fp8,fp8,0,0.033146666983763375
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,4,128,1,float16,fp8,0,0.03499733408292135
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,8,128,1,float16,float16,0,0.035605333745479584
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,2,128,1,fp8,fp8,0,0.031013332307338715
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,8,128,1,float16,fp8,0,0.0360000009338061
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,16,8,128,1,fp8,fp8,0,0.033471999069054924
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,16,128,1,float16,fp8,0,0.024357333779335022
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,16,128,1,float16,float16,0,0.02422400067249934
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,1,128,1,float16,fp8,0,0.022629333039124806
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,1,128,1,float16,float16,0,0.022602667411168415
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,16,128,1,fp8,fp8,0,0.022976001103719074
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,1,128,1,fp8,fp8,0,0.02111999938885371
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,2,128,1,float16,fp8,0,0.02311466634273529
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,2,128,1,fp8,fp8,0,0.021562665700912476
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,4,128,1,float16,fp8,0,0.02364266663789749
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,2,128,1,float16,float16,0,0.02266666789849599
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,4,128,1,float16,float16,0,0.02370133250951767
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,4,128,1,fp8,fp8,0,0.02274133265018463
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,8,128,1,float16,float16,0,0.02369066576162974
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,8,128,1,float16,fp8,0,0.024106666445732117
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,16,128,1,float16,float16,0,0.020474666108687718
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,16,128,1,float16,fp8,0,0.02035733312368393
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,16,8,128,1,fp8,fp8,0,0.02271466702222824
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,1,128,1,float16,float16,0,0.01939733326435089
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,1,128,1,float16,fp8,0,0.01953599974513054
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,16,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,1,128,1,fp8,fp8,0,0.01830400029818217
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,2,128,1,float16,float16,0,0.019306667149066925
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,2,128,1,float16,fp8,0,0.01956266661485036
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,2,128,1,fp8,fp8,0,0.0183999997874101
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,4,128,1,float16,float16,0,0.019354666272799175
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,4,128,1,float16,fp8,0,0.019968000551064808
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,4,128,1,fp8,fp8,0,0.01921066641807556
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,8,128,1,float16,fp8,0,0.019839999576409657
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,16,128,1,float16,float16,0,0.018559999763965607
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,8,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,16,128,1,float16,fp8,0,0.018735999862353008
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,16,8,128,1,float16,float16,0,0.019600000232458115
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,16,128,1,fp8,fp8,0,0.01720000058412552
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,1,128,1,float16,fp8,0,0.018325333793958027
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,1,128,1,float16,float16,0,0.017946666727463405
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,1,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,2,128,1,float16,float16,0,0.01821333294113477
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,2,128,1,fp8,fp8,0,0.017370666066805523
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,2,128,1,float16,fp8,0,0.018229333062966663
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,4,128,1,fp8,fp8,0,0.01747200017174085
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,8,128,1,float16,float16,0,0.01830400029818217
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,4,128,1,float16,float16,0,0.018245333184798557
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,8,128,1,fp8,fp8,0,0.017610666652520496
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,8,128,1,float16,fp8,0,0.018629333625237148
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,16,4,128,1,float16,fp8,0,0.018309333672126133
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,16,1,128,1,float16,float16,0,0.3086079955101013
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,16,1,128,1,float16,fp8,0,0.30805333455403644
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,16,1,128,1,fp8,fp8,0,0.29029866059621173
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,16,2,128,1,fp8,fp8,0,0.3239519993464152
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,16,2,128,1,float16,float16,0,0.31616532802581787
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,16,2,128,1,float16,fp8,0,0.3141813278198242
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,16,4,128,1,float16,float16,0,0.3352160056432088
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,16,4,128,1,float16,fp8,0,0.3295519948005676
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,1,128,1,float16,float16,0,0.1641973356405894
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,16,4,128,1,fp8,fp8,0,0.36486931641896564
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,16,128,1,float16,float16,0,0.21121066808700562
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,16,8,128,1,float16,fp8,0,0.3364959955215454
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,16,8,128,1,float16,float16,0,0.34038933118184406
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,16,128,1,fp8,fp8,0,0.19158399105072021
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,16,8,128,1,fp8,fp8,0,0.36830933888753253
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,1,128,1,float16,fp8,0,0.16421332955360413
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,16,128,1,float16,fp8,0,0.20384534200032553
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,1,128,1,fp8,fp8,0,0.15318933129310608
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,2,128,1,float16,float16,0,0.16927466789881387
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,2,128,1,float16,fp8,0,0.16779732704162598
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,2,128,1,fp8,fp8,0,0.1637226641178131
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,4,128,1,float16,float16,0,0.1774079998334249
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,4,128,1,float16,fp8,0,0.17481066783269247
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,4,128,1,fp8,fp8,0,0.1852160096168518
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,8,128,1,float16,fp8,0,0.17729065815607706
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,8,128,1,float16,float16,0,0.18121600151062012
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,16,128,1,float16,float16,0,0.11926933129628499
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,1,128,1,float16,float16,0,0.09274133046468098
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,16,8,128,1,fp8,fp8,0,0.19101866086324057
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,1,128,1,float16,fp8,0,0.09241599837938945
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,16,128,1,float16,fp8,0,0.1176639993985494
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,16,128,1,fp8,fp8,0,0.10430399576822917
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,1,128,1,fp8,fp8,0,0.08520000179608662
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,2,128,1,float16,fp8,0,0.09541866183280945
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,2,128,1,fp8,fp8,0,0.08683199683825175
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,2,128,1,float16,float16,0,0.09546132882436116
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,4,128,1,fp8,fp8,0,0.09851732850074768
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,4,128,1,float16,float16,0,0.10047466556231181
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,4,128,1,float16,fp8,0,0.09983467062314351
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,8,128,1,float16,float16,0,0.10300800204277039
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,8,128,1,float16,fp8,0,0.10105066498120625
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,16,128,1,float16,float16,0,0.06477866570154826
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,1,128,1,float16,fp8,0,0.052042668064435325
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,1,128,1,float16,float16,0,0.05171200136343638
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,16,8,128,1,fp8,fp8,0,0.10060800115267436
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,16,128,1,fp8,fp8,0,0.05948266883691152
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,16,128,1,float16,fp8,0,0.06294933458169301
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,1,128,1,fp8,fp8,0,0.04789866507053375
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,2,128,1,float16,float16,0,0.05268799761931101
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,2,128,1,float16,fp8,0,0.05282666782538096
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,4,128,1,float16,float16,0,0.055445333321889244
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,2,128,1,fp8,fp8,0,0.04987200101216634
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,4,128,1,float16,fp8,0,0.055546666185061135
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,4,128,1,fp8,fp8,0,0.055685331424077354
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,16,128,1,float16,float16,0,0.03544000039498011
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,8,128,1,float16,fp8,0,0.05700799822807312
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,8,128,1,float16,float16,0,0.057301332553227745
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,1,128,1,float16,float16,0,0.031685332457224526
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,16,8,128,1,fp8,fp8,0,0.05634133517742157
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,16,128,1,float16,fp8,0,0.035391998787721
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,16,128,1,fp8,fp8,0,0.03513599932193756
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,1,128,1,fp8,fp8,0,0.03032533327738444
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,1,128,1,float16,fp8,0,0.031770666440327965
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,2,128,1,float16,float16,0,0.03186666717131933
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,2,128,1,fp8,fp8,0,0.030960001051425934
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,4,128,1,float16,float16,0,0.03348266581694285
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,4,128,1,float16,fp8,0,0.03330666571855545
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,2,128,1,float16,fp8,0,0.032138665517171226
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,8,128,1,float16,float16,0,0.03310399999221166
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,4,128,1,fp8,fp8,0,0.03264000018437704
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,8,128,1,fp8,fp8,0,0.03322133421897888
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,16,128,1,float16,fp8,0,0.023610666394233704
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,16,128,1,float16,float16,0,0.02327466756105423
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,16,128,1,fp8,fp8,0,0.02277333289384842
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,16,8,128,1,float16,fp8,0,0.03382933388153712
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,1,128,1,float16,fp8,0,0.021989333132902782
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,1,128,1,float16,float16,0,0.021701333423455555
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,2,128,1,float16,fp8,0,0.02216000109910965
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,2,128,1,float16,float16,0,0.022005334496498108
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,1,128,1,fp8,fp8,0,0.021018666525681812
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,4,128,1,float16,float16,0,0.022463999688625336
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,2,128,1,fp8,fp8,0,0.02163200080394745
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,4,128,1,fp8,fp8,0,0.022613334159056347
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,8,128,1,float16,fp8,0,0.023082666099071503
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,4,128,1,float16,fp8,0,0.02329600105683009
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,8,128,1,fp8,fp8,0,0.02271466702222824
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,16,128,1,float16,fp8,0,0.018533332894245785
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,16,8,128,1,float16,float16,0,0.022970666488011677
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,16,128,1,float16,float16,0,0.01870399961868922
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,16,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,1,128,1,float16,fp8,0,0.018325333793958027
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,1,128,1,float16,float16,0,0.0180479995906353
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,1,128,1,fp8,fp8,0,0.018394666413466137
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,2,128,1,float16,float16,0,0.018101333330074947
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,2,128,1,float16,fp8,0,0.01803733284274737
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,4,128,1,float16,float16,0,0.018229333062966663
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,4,128,1,float16,fp8,0,0.01846933364868164
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,2,128,1,fp8,fp8,0,0.018357332795858383
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,4,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,8,128,1,float16,float16,0,0.01852799952030182
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,8,128,1,float16,fp8,0,0.018837332725524902
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,16,128,1,float16,float16,0,0.016976000120242436
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,16,8,128,1,fp8,fp8,0,0.019093333433071773
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,16,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,16,128,1,fp8,fp8,0,0.01736533393462499
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,1,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,1,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,2,128,1,float16,float16,0,0.01670933390657107
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,1,128,1,float16,float16,0,0.016704000532627106
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,2,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,4,128,1,float16,fp8,0,0.01748266691962878
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,2,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,4,128,1,float16,float16,0,0.016735999534527462
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,4,128,1,fp8,fp8,0,0.0174346665541331
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,8,128,1,float16,float16,0,0.016895999511082966
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,8,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,16,128,1,float16,fp8,0,0.01653333380818367
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,16,128,1,float16,float16,0,0.01623999948302905
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,16,128,1,fp8,fp8,0,0.01646399994691213
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,1,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,16,8,128,1,fp8,fp8,0,0.017535999417304993
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,1,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,1,128,1,float16,float16,0,0.01609066625436147
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,2,128,1,float16,float16,0,0.016255999604860943
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,2,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,4,128,1,float16,float16,0,0.016186666985352833
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,2,128,1,fp8,fp8,0,0.01674666628241539
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,4,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,8,128,1,float16,float16,0,0.0163680004576842
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,4,128,1,fp8,fp8,0,0.01682666689157486
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,16,8,128,1,float16,fp8,0,0.01674666628241539
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,16,1,128,1,float16,float16,0,0.1938719948132833
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,16,1,128,1,float16,fp8,0,0.1946720083554586
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,16,1,128,1,fp8,fp8,0,0.20335467656453451
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,16,2,128,1,float16,float16,0,0.19985600312550864
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,16,2,128,1,float16,fp8,0,0.1978879968325297
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,16,2,128,1,fp8,fp8,0,0.21584532658259073
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,16,4,128,1,float16,float16,0,0.20785599946975708
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,16,4,128,1,float16,fp8,0,0.20498667160669962
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,16,4,128,1,fp8,fp8,0,0.237445334593455
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,16,8,128,1,float16,float16,0,0.21388800938924155
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,16,8,128,1,float16,fp8,0,0.20948266983032227
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,16,128,1,float16,float16,0,0.12101333340009053
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,16,128,1,float16,fp8,0,0.11723732948303223
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,16,8,128,1,fp8,fp8,0,0.24117867151896158
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,1,128,1,float16,float16,0,0.10634666681289673
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,1,128,1,float16,fp8,0,0.10628267129262288
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,1,128,1,fp8,fp8,0,0.1127359966437022
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,16,128,1,fp8,fp8,0,0.13039466738700867
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,2,128,1,float16,float16,0,0.1092746655146281
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,2,128,1,float16,fp8,0,0.10872000455856323
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,2,128,1,fp8,fp8,0,0.11459733049074809
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,4,128,1,float16,float16,0,0.11508799592653911
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,4,128,1,fp8,fp8,0,0.1253919998804728
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,4,128,1,float16,fp8,0,0.11361066500345866
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,16,128,1,float16,float16,0,0.06682666639486949
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,1,128,1,float16,float16,0,0.059392000238100685
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,8,128,1,float16,float16,0,0.11843199531237285
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,8,128,1,float16,fp8,0,0.1162453293800354
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,16,128,1,float16,fp8,0,0.06497066716353099
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,16,8,128,1,fp8,fp8,0,0.1267626682917277
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,16,128,1,fp8,fp8,0,0.07253333429495494
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,1,128,1,float16,fp8,0,0.05895466605822245
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,1,128,1,fp8,fp8,0,0.06232533355553945
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,2,128,1,float16,float16,0,0.06057066718737284
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,2,128,1,float16,fp8,0,0.06039466460545858
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,4,128,1,float16,fp8,0,0.06297066807746887
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,4,128,1,fp8,fp8,0,0.0697813332080841
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,4,128,1,float16,float16,0,0.06313066681226094
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,2,128,1,fp8,fp8,0,0.06495466828346252
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,8,128,1,float16,float16,0,0.06351466476917267
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,8,128,1,float16,fp8,0,0.06440000236034393
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,16,8,128,1,fp8,fp8,0,0.0703413337469101
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,16,128,1,float16,float16,0,0.03679466744263967
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,16,128,1,float16,fp8,0,0.03577066709597906
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,1,128,1,float16,float16,0,0.03528533379236857
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,16,128,1,fp8,fp8,0,0.04254400233427683
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,1,128,1,float16,fp8,0,0.03527999917666117
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,1,128,1,fp8,fp8,0,0.03714133302370707
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,2,128,1,float16,float16,0,0.03573866685231527
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,2,128,1,fp8,fp8,0,0.037845333417256675
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,2,128,1,float16,fp8,0,0.03581333408753077
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,4,128,1,float16,fp8,0,0.03702399879693985
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,4,128,1,float16,float16,0,0.036714665591716766
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,4,128,1,fp8,fp8,0,0.039674667020638786
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,16,128,1,float16,float16,0,0.02401600033044815
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,8,128,1,fp8,fp8,0,0.040149333576361336
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,1,128,1,float16,fp8,0,0.023183998962243397
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,16,128,1,float16,fp8,0,0.02425066630045573
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,16,128,1,fp8,fp8,0,0.02646933247645696
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,8,128,1,float16,fp8,0,0.036661334335803986
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,1,128,1,float16,float16,0,0.023018665611743927
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,16,8,128,1,float16,float16,0,0.03685333331425985
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,2,128,1,float16,fp8,0,0.023317334552605946
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,1,128,1,fp8,fp8,0,0.024735999604066212
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,2,128,1,float16,float16,0,0.023605334262053173
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,2,128,1,fp8,fp8,0,0.02510400116443634
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,4,128,1,float16,float16,0,0.02405333270629247
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,4,128,1,float16,fp8,0,0.024266667664051056
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,4,128,1,fp8,fp8,0,0.025888000925381977
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,8,128,1,float16,float16,0,0.024010665714740753
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,16,128,1,float16,float16,0,0.018218666315078735
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,8,128,1,fp8,fp8,0,0.026047999660174053
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,16,8,128,1,float16,fp8,0,0.024165332317352295
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,1,128,1,float16,float16,0,0.01721599946419398
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,16,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,1,128,1,float16,fp8,0,0.01728533332546552
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,16,128,1,float16,fp8,0,0.018346666047970455
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,1,128,1,fp8,fp8,0,0.01806933308641116
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,2,128,1,float16,float16,0,0.017258666455745697
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,2,128,1,float16,fp8,0,0.017360000560681026
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,4,128,1,float16,float16,0,0.017423999806245167
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,2,128,1,fp8,fp8,0,0.01828266680240631
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,4,128,1,float16,fp8,0,0.017877332866191864
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,4,128,1,fp8,fp8,0,0.019167999426523846
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,8,128,1,float16,float16,0,0.017594666530688603
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,8,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,16,8,128,1,float16,fp8,0,0.01786133274435997
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,16,128,1,fp8,fp8,0,0.017509333789348602
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,16,128,1,float16,float16,0,0.015706667055686314
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,16,128,1,float16,fp8,0,0.0161013330022494
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,1,128,1,float16,float16,0,0.015658666690190632
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,1,128,1,float16,fp8,0,0.016010666886965435
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,2,128,1,float16,float16,0,0.015696000307798386
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,1,128,1,fp8,fp8,0,0.016762666404247284
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,4,128,1,float16,float16,0,0.015599999576807022
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,2,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,2,128,1,float16,fp8,0,0.016165333489576977
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,4,128,1,float16,fp8,0,0.016229332735141117
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,4,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,8,128,1,float16,float16,0,0.01569066693385442
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,8,128,1,float16,fp8,0,0.016501333564519882
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,16,8,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,16,128,1,float16,float16,0,0.014970666418472925
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,16,128,1,float16,fp8,0,0.015498666713635126
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,16,128,1,fp8,fp8,0,0.01613866661985715
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,1,128,1,float16,fp8,0,0.015781333049138386
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,1,128,1,fp8,fp8,0,0.016282666474580765
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,1,128,1,float16,float16,0,0.015178666760524115
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,2,128,1,float16,float16,0,0.01523200049996376
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,2,128,1,float16,fp8,0,0.015829333414634068
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,2,128,1,fp8,fp8,0,0.016415999581416447
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,4,128,1,float16,float16,0,0.015306666493415833
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,4,128,1,fp8,fp8,0,0.016554666062196095
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,4,128,1,float16,fp8,0,0.015893333901961643
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,8,128,1,fp8,fp8,0,0.016480000068744022
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,16,128,1,float16,float16,0,0.014533333480358124
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,16,128,1,float16,fp8,0,0.014991999914248785
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,8,128,1,float16,float16,0,0.014922666052977243
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,16,128,1,fp8,fp8,0,0.015829333414634068
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,16,8,128,1,float16,fp8,0,0.01569066693385442
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,1,128,1,float16,float16,0,0.014629332969586054
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,1,128,1,float16,fp8,0,0.015541333705186844
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,1,128,1,fp8,fp8,0,0.016197333733240765
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,2,128,1,float16,fp8,0,0.015376000354687372
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,2,128,1,float16,float16,0,0.014794666320085526
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,2,128,1,fp8,fp8,0,0.016010666886965435
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,4,128,1,fp8,fp8,0,0.01600533351302147
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,4,128,1,float16,float16,0,0.01481066644191742
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,4,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,8,128,1,float16,fp8,0,0.015541333705186844
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,8,128,1,float16,float16,0,0.014874666929244995
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,16,8,128,1,fp8,fp8,0,0.01603200038274129
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,16,1,128,1,float16,float16,0,0.13914133111635843
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,16,1,128,1,float16,fp8,0,0.1389173368612925
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,16,2,128,1,float16,float16,0,0.14272532860438028
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,16,1,128,1,fp8,fp8,0,0.1663093368212382
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,16,2,128,1,float16,fp8,0,0.14199999968210855
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,16,2,128,1,fp8,fp8,0,0.16866666078567505
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,16,4,128,1,float16,float16,0,0.1465120017528534
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,16,4,128,1,float16,fp8,0,0.1455893317858378
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,16,4,128,1,fp8,fp8,0,0.17985600233078003
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,16,8,128,1,float16,float16,0,0.15121600031852722
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,16,128,1,float16,float16,0,0.08213866750399272
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,16,8,128,1,float16,fp8,0,0.14895466963450113
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,16,128,1,float16,fp8,0,0.08012266457080841
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,16,8,128,1,fp8,fp8,0,0.17847466468811035
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,1,128,1,float16,float16,0,0.07422933479150136
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,1,128,1,float16,fp8,0,0.07625066737333934
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,16,128,1,fp8,fp8,0,0.100490669409434
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,2,128,1,float16,fp8,0,0.07611733178297679
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,2,128,1,fp8,fp8,0,0.09264533718427022
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,4,128,1,float16,float16,0,0.07870399951934814
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,4,128,1,float16,fp8,0,0.07910933097203572
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,2,128,1,float16,float16,0,0.075354665517807
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,1,128,1,fp8,fp8,0,0.08922132849693298
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,4,128,1,fp8,fp8,0,0.0981333355108897
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,8,128,1,float16,fp8,0,0.0807360013326009
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,8,128,1,float16,float16,0,0.081386665503184
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,16,8,128,1,fp8,fp8,0,0.09845866759618123
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,1,128,1,float16,float16,0,0.04268266757329305
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,16,128,1,float16,fp8,0,0.04357333481311798
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,16,128,1,float16,float16,0,0.044351999958356224
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,16,128,1,fp8,fp8,0,0.05620799958705902
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,1,128,1,float16,fp8,0,0.04359999795754751
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,1,128,1,fp8,fp8,0,0.05083199838797251
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,4,128,1,float16,fp8,0,0.04483200112978617
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,2,128,1,float16,float16,0,0.04359999795754751
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,2,128,1,fp8,fp8,0,0.05150400102138519
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,4,128,1,float16,float16,0,0.04483733574549357
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,2,128,1,float16,fp8,0,0.04404266675313314
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,4,128,1,fp8,fp8,0,0.05375466744105021
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,8,128,1,float16,float16,0,0.04497066636880239
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,8,128,1,float16,fp8,0,0.04447466631730398
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,1,128,1,float16,float16,0,0.027119999130566914
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,16,128,1,fp8,fp8,0,0.03311466674009959
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,16,128,1,float16,fp8,0,0.02741866558790207
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,16,128,1,float16,float16,0,0.027424000203609467
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,1,128,1,float16,fp8,0,0.02701866626739502
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,1,128,1,fp8,fp8,0,0.03178133318821589
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,2,128,1,float16,float16,0,0.027295999228954315
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,16,8,128,1,fp8,fp8,0,0.05434666574001312
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,2,128,1,float16,fp8,0,0.027471999327341717
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,2,128,1,fp8,fp8,0,0.032069332897663116
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,4,128,1,fp8,fp8,0,0.03303466737270355
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,4,128,1,float16,fp8,0,0.027994667490323383
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,4,128,1,float16,float16,0,0.027989332874615986
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,8,128,1,float16,float16,0,0.028351999819278717
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,8,128,1,float16,fp8,0,0.028192001084486645
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,16,8,128,1,fp8,fp8,0,0.03323200096686681
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,16,128,1,float16,float16,0,0.019744000087181728
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,16,128,1,float16,fp8,0,0.019727999965349834
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,16,128,1,fp8,fp8,0,0.022826666633288067
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,1,128,1,float16,fp8,0,0.01878400022784869
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,1,128,1,float16,float16,0,0.0185759998857975
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,1,128,1,fp8,fp8,0,0.021695998807748158
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,2,128,1,float16,float16,0,0.018954666952292126
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,2,128,1,float16,fp8,0,0.019109333554903667
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,2,128,1,fp8,fp8,0,0.02195200075705846
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,4,128,1,float16,float16,0,0.019215999792019527
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,4,128,1,float16,fp8,0,0.01926933353145917
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,8,128,1,float16,float16,0,0.01953599974513054
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,4,128,1,fp8,fp8,0,0.022453332940737408
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,8,128,1,float16,fp8,0,0.019706666469573975
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,16,8,128,1,fp8,fp8,0,0.02350933353106181
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,16,128,1,float16,float16,0,0.01573866605758667
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,16,128,1,float16,fp8,0,0.016000000139077503
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,16,128,1,fp8,fp8,0,0.01786133274435997
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,1,128,1,float16,float16,0,0.014943999548753103
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,1,128,1,float16,fp8,0,0.01533866673707962
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,2,128,1,float16,float16,0,0.01504533365368843
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,2,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,1,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,4,128,1,float16,float16,0,0.015135999768972397
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,4,128,1,float16,fp8,0,0.015471999843915304
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,2,128,1,float16,fp8,0,0.015439999600251516
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,4,128,1,fp8,fp8,0,0.017397332936525345
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,8,128,1,float16,float16,0,0.01565333331624667
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,8,128,1,float16,fp8,0,0.016010666886965435
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,16,8,128,1,fp8,fp8,0,0.017573333034912746
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,16,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,16,128,1,fp8,fp8,0,0.016517333686351776
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,16,128,1,float16,float16,0,0.014783999572197596
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,2,128,1,float16,float16,0,0.014490666488806406
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,1,128,1,float16,fp8,0,0.01492799942692121
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,1,128,1,fp8,fp8,0,0.016682667036851246
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,2,128,1,fp8,fp8,0,0.016800000021855038
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,2,128,1,float16,fp8,0,0.014901333798964819
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,1,128,1,float16,float16,0,0.014511999984582266
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,4,128,1,float16,float16,0,0.014432000617186228
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,4,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,4,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,8,128,1,float16,float16,0,0.01444799949725469
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,16,128,1,float16,float16,0,0.01379199946920077
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,8,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,16,8,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,16,128,1,float16,fp8,0,0.014485333114862442
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,16,128,1,fp8,fp8,0,0.015872000406185787
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,1,128,1,float16,fp8,0,0.014671999961137772
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,2,128,1,float16,float16,0,0.013925333817799887
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,1,128,1,float16,float16,0,0.014032000054915747
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,2,128,1,float16,fp8,0,0.014869333555301031
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,4,128,1,float16,float16,0,0.014090667168299357
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,1,128,1,fp8,fp8,0,0.01607999950647354
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,2,128,1,fp8,fp8,0,0.016127999871969223
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,4,128,1,float16,fp8,0,0.014752000570297241
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,4,128,1,fp8,fp8,0,0.016250666230916977
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,8,128,1,float16,float16,0,0.013904000322024027
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,8,128,1,fp8,fp8,0,0.016336000214020412
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,16,8,128,1,float16,fp8,0,0.014853333433469137
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,16,128,1,float16,float16,0,0.013562666873137156
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,16,128,1,float16,fp8,0,0.014117332796255747
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,16,128,1,fp8,fp8,0,0.01584533353646596
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,1,128,1,float16,float16,0,0.013807999591032663
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,1,128,1,fp8,fp8,0,0.01586666703224182
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,1,128,1,float16,fp8,0,0.014544000228246054
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,2,128,1,float16,float16,0,0.013989333063364029
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,2,128,1,fp8,fp8,0,0.016154666741689045
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,2,128,1,float16,fp8,0,0.014581333845853806
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,4,128,1,float16,float16,0,0.013882666826248169
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,4,128,1,float16,fp8,0,0.01471466695268949
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,4,128,1,fp8,fp8,0,0.015941333025693893
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,8,128,1,float16,float16,0,0.013823999712864557
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,8,128,1,fp8,fp8,0,0.016058667252461117
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,16,8,128,1,float16,fp8,0,0.01469333345691363
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,16,1,128,1,float16,float16,0,0.11794666449228923
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,16,1,128,1,float16,fp8,0,0.11889599760373433
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,16,2,128,1,float16,fp8,0,0.11958400408426921
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,16,2,128,1,float16,float16,0,0.11913067102432251
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,16,1,128,1,fp8,fp8,0,0.1411146620909373
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,16,2,128,1,fp8,fp8,0,0.14362667004267374
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,16,4,128,1,float16,float16,0,0.12231466174125671
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,16,4,128,1,float16,fp8,0,0.12133333086967468
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,16,4,128,1,fp8,fp8,0,0.14967999855677286
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,16,8,128,1,float16,float16,0,0.12414933244387309
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,16,8,128,1,fp8,fp8,0,0.15009066462516785
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,16,8,128,1,float16,fp8,0,0.12324266632397969
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,16,128,1,float16,fp8,0,0.0602453351020813
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,16,128,1,fp8,fp8,0,0.08233066896597545
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,16,128,1,float16,float16,0,0.06132799883683523
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,1,128,1,float16,float16,0,0.06499733527501424
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,1,128,1,float16,fp8,0,0.06522666911284129
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,2,128,1,float16,float16,0,0.06628266473611195
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,1,128,1,fp8,fp8,0,0.0786293347676595
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,2,128,1,float16,fp8,0,0.06537599861621857
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,4,128,1,float16,float16,0,0.06649599969387054
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,2,128,1,fp8,fp8,0,0.07923200229803722
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,4,128,1,fp8,fp8,0,0.08110400040944417
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,4,128,1,float16,fp8,0,0.06620799998442332
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,8,128,1,float16,float16,0,0.06685866912206014
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,8,128,1,float16,fp8,0,0.06744533280531566
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,16,128,1,float16,float16,0,0.03576533248027166
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,16,128,1,float16,fp8,0,0.03588266670703888
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,16,128,1,fp8,fp8,0,0.04674666623274485
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,1,128,1,float16,float16,0,0.03856533269087473
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,1,128,1,fp8,fp8,0,0.045850664377212524
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,16,8,128,1,fp8,fp8,0,0.08264533181985219
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,1,128,1,float16,fp8,0,0.038346665600935616
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,2,128,1,float16,float16,0,0.03892799963553747
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,2,128,1,float16,fp8,0,0.03908266623814901
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,2,128,1,fp8,fp8,0,0.04602666695912679
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,4,128,1,float16,fp8,0,0.03945599993069967
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,4,128,1,float16,float16,0,0.038922667503356934
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,4,128,1,fp8,fp8,0,0.0473333348830541
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,16,128,1,float16,float16,0,0.023941333095232647
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,8,128,1,fp8,fp8,0,0.04734399914741516
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,8,128,1,float16,fp8,0,0.039488000174363456
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,16,8,128,1,float16,float16,0,0.039477333426475525
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,16,128,1,float16,fp8,0,0.023973333338896435
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,1,128,1,float16,float16,0,0.024725332856178284
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,1,128,1,float16,fp8,0,0.024698667228221893
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,16,128,1,fp8,fp8,0,0.029738667110602062
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,1,128,1,fp8,fp8,0,0.028938665986061096
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,2,128,1,float16,float16,0,0.025013332565625507
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,4,128,1,float16,float16,0,0.024911999702453613
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,2,128,1,float16,fp8,0,0.024879999458789825
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,4,128,1,float16,fp8,0,0.025216000775496166
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,4,128,1,fp8,fp8,0,0.029738667110602062
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,2,128,1,fp8,fp8,0,0.029333333174387615
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,8,128,1,float16,float16,0,0.02521066615978877
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,8,128,1,fp8,fp8,0,0.030026666820049286
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,16,8,128,1,float16,fp8,0,0.025386666258176167
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,16,128,1,float16,float16,0,0.018053332964579265
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,1,128,1,float16,float16,0,0.017535999417304993
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,16,128,1,float16,fp8,0,0.018133333573738735
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,16,128,1,fp8,fp8,0,0.02125866711139679
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,1,128,1,fp8,fp8,0,0.02109333376089732
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,1,128,1,float16,fp8,0,0.017850667238235474
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,2,128,1,float16,fp8,0,0.018250666558742523
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,2,128,1,float16,float16,0,0.01775466650724411
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,2,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,4,128,1,float16,float16,0,0.017893332988023758
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,4,128,1,float16,fp8,0,0.018245333184798557
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,8,128,1,float16,float16,0,0.018079999834299088
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,4,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,8,128,1,float16,fp8,0,0.01844800015290578
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,16,8,128,1,fp8,fp8,0,0.021146667500336964
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,16,128,1,float16,float16,0,0.015125333021084467
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,16,128,1,float16,fp8,0,0.015466666469971338
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,16,128,1,fp8,fp8,0,0.017349333812793095
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,1,128,1,float16,fp8,0,0.015568000574906668
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,1,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,1,128,1,float16,float16,0,0.015040000279744467
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,2,128,1,float16,float16,0,0.01498666654030482
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,2,128,1,float16,fp8,0,0.015471999843915304
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,2,128,1,fp8,fp8,0,0.01728533332546552
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,4,128,1,float16,float16,0,0.015210667004187902
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,4,128,1,float16,fp8,0,0.015397333850463232
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,4,128,1,fp8,fp8,0,0.01764800027012825
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,8,128,1,float16,float16,0,0.014954666296641031
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,16,128,1,float16,float16,0,0.014309333016475042
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,8,128,1,fp8,fp8,0,0.01782400036851565
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,16,128,1,float16,fp8,0,0.014720000326633453
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,16,128,1,fp8,fp8,0,0.01632000009218852
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,1,128,1,float16,float16,0,0.014159999787807465
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,16,8,128,1,float16,fp8,0,0.015557333827018738
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,1,128,1,float16,fp8,0,0.014682666709025701
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,1,128,1,fp8,fp8,0,0.01628799984852473
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,2,128,1,float16,float16,0,0.014127999544143677
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,2,128,1,fp8,fp8,0,0.01657066618402799
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,4,128,1,float16,float16,0,0.014202666779359182
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,2,128,1,float16,fp8,0,0.014789332946141561
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,4,128,1,float16,fp8,0,0.01469333345691363
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,8,128,1,float16,float16,0,0.014197333405415217
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,8,128,1,float16,fp8,0,0.014607999473810196
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,4,128,1,fp8,fp8,0,0.016549333930015564
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,16,8,128,1,fp8,fp8,0,0.016597333053747814
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,16,128,1,float16,float16,0,0.013904000322024027
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,16,128,1,float16,fp8,0,0.014368000129858652
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,1,128,1,float16,float16,0,0.01403733342885971
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,1,128,1,float16,fp8,0,0.01462399959564209
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,16,128,1,fp8,fp8,0,0.01598400001724561
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,1,128,1,fp8,fp8,0,0.01598400001724561
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,2,128,1,float16,float16,0,0.013946666071812311
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,2,128,1,fp8,fp8,0,0.016154666741689045
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,2,128,1,float16,fp8,0,0.01463466634353002
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,4,128,1,float16,float16,0,0.013962666193644205
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,4,128,1,float16,fp8,0,0.014730667074521383
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,4,128,1,fp8,fp8,0,0.016010666886965435
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,8,128,1,float16,float16,0,0.013973332941532135
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,8,128,1,float16,fp8,0,0.014602666099866232
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,16,8,128,1,fp8,fp8,0,0.01603200038274129
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,16,128,1,float16,float16,0,0.013663999736309052
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,16,128,1,fp8,fp8,0,0.015781333049138386
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,16,128,1,float16,fp8,0,0.014122666170199713
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,1,128,1,float16,float16,0,0.013872000078360239
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,1,128,1,float16,fp8,0,0.01443733274936676
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,2,128,1,float16,float16,0,0.013797332843144735
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,1,128,1,fp8,fp8,0,0.015919999529918034
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,2,128,1,float16,fp8,0,0.014309333016475042
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,2,128,1,fp8,fp8,0,0.01595199977358182
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,4,128,1,float16,fp8,0,0.014479999740918478
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,8,128,1,float16,float16,0,0.013738666971524557
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,4,128,1,float16,float16,0,0.01393066719174385
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,4,128,1,fp8,fp8,0,0.015978666643301647
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,8,128,1,float16,fp8,0,0.014469332993030548
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,16,8,128,1,fp8,fp8,0,0.016000000139077503
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,12,1,128,1,float16,float16,0,12.434672037760416
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,12,1,128,1,fp8,fp8,0,7.911477406819661
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,12,1,128,1,float16,fp8,0,12.45697021484375
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,12,2,128,1,fp8,fp8,0,7.8743896484375
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,12,2,128,1,float16,float16,0,12.445509592692057
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,12,2,128,1,float16,fp8,0,12.475509643554688
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,12,4,128,1,float16,float16,0,12.666197458902994
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,12,4,128,1,float16,fp8,0,12.438031514485678
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,12,1,128,1,float16,fp8,0,6.225162506103516
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,12,1,128,1,float16,float16,0,6.157978693644206
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,12,1,128,1,fp8,fp8,0,3.965674718221029
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,12,12,128,1,float16,float16,0,5.848202387491862
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,12,4,128,1,fp8,fp8,0,8.060288111368815
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,12,2,128,1,float16,float16,0,6.198709487915039
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,12,12,128,1,float16,fp8,0,5.926928202311198
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,12,12,128,1,fp8,fp8,0,3.9603360493977866
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,12,2,128,1,float16,fp8,0,5.901962916056315
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,12,2,128,1,fp8,fp8,0,3.967754681905111
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,12,4,128,1,float16,float16,0,6.296394983927409
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,12,1,128,1,float16,float16,0,3.0293760299682617
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,12,4,128,1,float16,fp8,0,6.082181294759114
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,12,12,128,1,float16,fp8,0,3.020554542541504
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,12,12,128,1,float16,float16,0,2.9284693400065103
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,12,1,128,1,float16,fp8,0,3.0595146814982095
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,12,12,128,1,fp8,fp8,0,1.9911573727925618
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,12,4,128,1,fp8,fp8,0,4.034874598185222
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,12,1,128,1,fp8,fp8,0,2.0099147160847983
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,12,2,128,1,float16,float16,0,2.9973227183024087
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,12,2,128,1,float16,fp8,0,2.9606399536132812
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,12,2,128,1,fp8,fp8,0,2.0240426063537598
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,12,12,128,1,float16,float16,0,1.5793654123942058
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,12,4,128,1,fp8,fp8,0,2.0265653928120932
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,12,4,128,1,float16,float16,0,3.0729331970214844
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,12,4,128,1,float16,fp8,0,3.025994618733724
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,12,1,128,1,float16,float16,0,1.6016106605529785
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,12,1,128,1,float16,fp8,0,1.6082879702250164
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,12,12,128,1,float16,fp8,0,1.560538609822591
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,12,1,128,1,fp8,fp8,0,1.0299946467081706
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,12,12,128,1,fp8,fp8,0,1.0149493217468262
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,12,2,128,1,float16,fp8,0,1.596816062927246
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,12,2,128,1,float16,float16,0,1.613104025522868
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,12,2,128,1,fp8,fp8,0,1.0340320269266765
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,12,4,128,1,float16,float16,0,1.60316801071167
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,12,4,128,1,float16,fp8,0,1.6149546305338542
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,12,4,128,1,fp8,fp8,0,1.037274678548177
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,12,1,128,1,float16,float16,0,7.082629521687825
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,12,1,128,1,float16,fp8,0,7.146154403686523
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,12,1,128,1,fp8,fp8,0,4.599482536315918
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,12,2,128,1,float16,float16,0,7.097690582275391
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,12,2,128,1,float16,fp8,0,6.926490783691406
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,12,2,128,1,fp8,fp8,0,4.624575932820638
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,12,4,128,1,float16,float16,0,7.183829625447591
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,12,4,128,1,float16,fp8,0,7.113360087076823
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,12,1,128,1,float16,float16,0,3.435994784037272
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,12,1,128,1,float16,fp8,0,3.4810028076171875
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,12,1,128,1,fp8,fp8,0,2.3100694020589194
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,12,12,128,1,float16,fp8,0,3.3268585205078125
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,12,4,128,1,fp8,fp8,0,4.71888001759847
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,12,12,128,1,float16,float16,0,3.3354291915893555
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,12,12,128,1,fp8,fp8,0,2.300960063934326
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,12,2,128,1,float16,float16,0,3.4557278951009116
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,12,2,128,1,float16,fp8,0,3.5035788218180337
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,12,2,128,1,fp8,fp8,0,2.292719999949137
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,12,1,128,1,float16,fp8,0,1.7538399696350098
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,12,1,128,1,float16,float16,0,1.7961866060892742
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,12,4,128,1,float16,float16,0,3.575434684753418
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,12,4,128,1,fp8,fp8,0,2.3261706034342446
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,12,12,128,1,float16,float16,0,1.743018627166748
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,12,4,128,1,float16,fp8,0,3.3852052688598633
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,12,12,128,1,float16,fp8,0,1.744165261586507
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,12,12,128,1,fp8,fp8,0,1.2104907035827637
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,12,1,128,1,fp8,fp8,0,1.2135732968648274
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,12,2,128,1,fp8,fp8,0,1.2211360136667888
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,12,2,128,1,float16,float16,0,1.7997493743896484
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,12,2,128,1,float16,fp8,0,1.7835253079732258
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,12,4,128,1,float16,float16,0,1.7869280179341633
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,12,4,128,1,float16,fp8,0,1.773263931274414
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,12,4,128,1,fp8,fp8,0,1.2301653226216633
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,12,12,128,1,float16,float16,0,0.9543733596801758
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,12,1,128,1,float16,fp8,0,0.9750986893971761
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,12,1,128,1,float16,float16,0,0.9846773147583008
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,12,12,128,1,float16,fp8,0,0.9436319669087728
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,12,1,128,1,fp8,fp8,0,0.6275039911270142
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,12,2,128,1,float16,float16,0,0.9790559609731039
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,12,2,128,1,float16,fp8,0,0.9749120076497396
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,12,2,128,1,fp8,fp8,0,0.6285920143127441
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,12,12,128,1,fp8,fp8,0,0.6222560008366903
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,12,4,128,1,float16,float16,0,0.9876533349355062
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,12,4,128,1,float16,fp8,0,0.9820799827575684
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,12,4,128,1,fp8,fp8,0,0.632207989692688
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,12,1,128,1,float16,float16,0,5.095354715983073
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,12,1,128,1,fp8,fp8,0,3.2163254419962564
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,12,1,128,1,float16,fp8,0,4.756794611612956
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,12,2,128,1,float16,float16,0,5.017754554748535
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,12,2,128,1,float16,fp8,0,4.947056134541829
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,12,2,128,1,fp8,fp8,0,3.2415040334065757
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,12,4,128,1,float16,fp8,0,4.761930783589681
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,12,4,128,1,float16,float16,0,5.07912540435791
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,12,1,128,1,float16,float16,0,2.450064023335775
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,12,1,128,1,float16,fp8,0,2.4330080350240073
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,12,12,128,1,float16,fp8,0,2.390437285105387
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,12,12,128,1,float16,float16,0,2.451669375101725
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,12,1,128,1,fp8,fp8,0,1.6576639811197917
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,12,12,128,1,fp8,fp8,0,1.6800586382548015
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,12,4,128,1,fp8,fp8,0,3.248533248901367
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,12,2,128,1,float16,float16,0,2.4645973841349282
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,12,2,128,1,float16,fp8,0,2.435632069905599
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,12,2,128,1,fp8,fp8,0,1.6748266220092773
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,12,1,128,1,float16,float16,0,1.2950879732767742
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,12,4,128,1,float16,float16,0,2.508853276570638
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,12,4,128,1,float16,fp8,0,2.4372960726420083
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,12,4,128,1,fp8,fp8,0,1.6792319615681965
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,12,12,128,1,float16,fp8,0,1.2594506740570068
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,12,12,128,1,float16,float16,0,1.2692426840464275
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,12,1,128,1,float16,fp8,0,1.2841119766235352
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,12,12,128,1,fp8,fp8,0,0.874661366144816
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,12,1,128,1,fp8,fp8,0,0.8494666417439779
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,12,2,128,1,float16,float16,0,1.2880799770355225
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,12,2,128,1,float16,fp8,0,1.2828853130340576
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,12,2,128,1,fp8,fp8,0,0.8500266869862875
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,12,4,128,1,float16,float16,0,1.2935893535614014
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,12,4,128,1,fp8,fp8,0,0.8681440353393555
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,12,12,128,1,float16,float16,0,0.6402346690495809
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,12,4,128,1,float16,fp8,0,1.2854560216267903
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,12,12,128,1,float16,fp8,0,0.6354346672693888
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,12,1,128,1,float16,fp8,0,0.6291040182113647
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,12,1,128,1,float16,float16,0,0.6410080194473267
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,12,1,128,1,fp8,fp8,0,0.46371201674143475
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,12,2,128,1,float16,float16,0,0.6411093473434448
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,12,12,128,1,fp8,fp8,0,0.45877333482106525
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,12,2,128,1,float16,fp8,0,0.6339146693547567
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,12,2,128,1,fp8,fp8,0,0.46315733591715497
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,12,4,128,1,float16,float16,0,0.6436640024185181
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,12,4,128,1,float16,fp8,0,0.6340853373209635
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,12,4,128,1,fp8,fp8,0,0.4696693420410156
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,12,1,128,1,float16,float16,0,6.63693364461263
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,12,1,128,1,float16,fp8,0,6.662517547607422
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,12,1,128,1,fp8,fp8,0,4.363295873006185
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,12,2,128,1,fp8,fp8,0,4.347493489583333
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,12,2,128,1,float16,float16,0,6.489925384521484
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,12,2,128,1,float16,fp8,0,6.754010518391927
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,12,4,128,1,float16,float16,0,6.625781377156575
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,12,4,128,1,float16,fp8,0,6.398960113525391
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,12,1,128,1,float16,float16,0,3.196336110432943
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,12,1,128,1,float16,fp8,0,3.179925282796224
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,12,1,128,1,fp8,fp8,0,2.1708319981892905
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,12,2,128,1,float16,float16,0,3.1774400075276694
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,12,12,128,1,float16,float16,0,3.1654933293660483
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,12,12,128,1,float16,fp8,0,3.1159146626790366
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,12,12,128,1,fp8,fp8,0,2.2246932983398438
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,12,4,128,1,fp8,fp8,0,4.425482749938965
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,12,2,128,1,float16,fp8,0,3.172266642252604
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,12,2,128,1,fp8,fp8,0,2.184112071990967
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,12,1,128,1,float16,float16,0,1.6453280448913574
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,12,4,128,1,float16,float16,0,3.2242453893025718
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,12,1,128,1,float16,fp8,0,1.6176692644755046
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,12,4,128,1,float16,fp8,0,3.163882573445638
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,12,12,128,1,float16,float16,0,1.616714636484782
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,12,4,128,1,fp8,fp8,0,2.2166879971822104
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,12,12,128,1,float16,fp8,0,1.6288053194681804
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,12,12,128,1,fp8,fp8,0,1.1522826353708904
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,12,1,128,1,fp8,fp8,0,1.1377066771189372
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,12,2,128,1,float16,float16,0,1.625696023305257
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,12,2,128,1,float16,fp8,0,1.6233439445495605
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,12,2,128,1,fp8,fp8,0,1.1433333555857341
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,12,4,128,1,float16,float16,0,1.6411627133687336
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,12,4,128,1,float16,fp8,0,1.6431573232014973
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,12,12,128,1,float16,float16,0,0.8622346719106039
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,12,1,128,1,float16,float16,0,0.8758560021718343
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,12,4,128,1,fp8,fp8,0,1.1530133088429768
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,12,1,128,1,float16,fp8,0,0.8670132954915365
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,12,1,128,1,fp8,fp8,0,0.5827946662902832
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,12,12,128,1,float16,fp8,0,0.8677866458892822
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,12,12,128,1,fp8,fp8,0,0.5924373467763265
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,12,2,128,1,float16,float16,0,0.8831093311309814
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,12,2,128,1,float16,fp8,0,0.8699093659718832
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,12,2,128,1,fp8,fp8,0,0.5839626789093018
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,12,4,128,1,float16,float16,0,0.8848693370819092
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,12,4,128,1,float16,fp8,0,0.8777813116709391
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,12,12,128,1,float16,fp8,0,0.42626134554545086
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,12,4,128,1,fp8,fp8,0,0.5897066593170166
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,12,12,128,1,float16,float16,0,0.43594133853912354
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,12,1,128,1,float16,float16,0,0.4329013427098592
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,12,1,128,1,float16,fp8,0,0.4273333152135213
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,12,12,128,1,fp8,fp8,0,0.32550400495529175
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,12,1,128,1,fp8,fp8,0,0.32209599018096924
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,12,2,128,1,float16,float16,0,0.43752535184224445
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,12,2,128,1,float16,fp8,0,0.42847998936971027
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,12,2,128,1,fp8,fp8,0,0.32365334033966064
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,12,4,128,1,float16,float16,0,0.437605341275533
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,12,4,128,1,float16,fp8,0,0.4297706683476766
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,12,4,128,1,fp8,fp8,0,0.3265333374341329
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,12,1,128,1,float16,float16,0,3.811423937479655
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,12,1,128,1,fp8,fp8,0,2.6039093335469565
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,12,1,128,1,float16,fp8,0,3.837162653605143
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,12,2,128,1,float16,float16,0,3.700922648111979
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,12,2,128,1,float16,fp8,0,3.760320027669271
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,12,2,128,1,fp8,fp8,0,2.6259519259134927
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,12,4,128,1,float16,float16,0,3.74287478129069
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,12,4,128,1,float16,fp8,0,3.915792147318522
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,12,1,128,1,float16,float16,0,1.888853391011556
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,12,1,128,1,float16,fp8,0,1.8854880332946777
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,12,12,128,1,float16,float16,0,1.9005866050720215
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,12,1,128,1,fp8,fp8,0,1.3362399737040203
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,12,12,128,1,float16,fp8,0,1.8987253506978352
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,12,2,128,1,float16,float16,0,1.9032106399536133
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,12,4,128,1,fp8,fp8,0,2.6567625999450684
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,12,12,128,1,fp8,fp8,0,1.3675360679626465
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,12,2,128,1,float16,fp8,0,1.8723093668619792
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,12,2,128,1,fp8,fp8,0,1.3514986038208008
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,12,1,128,1,float16,float16,0,0.9897493521372477
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,12,4,128,1,float16,float16,0,1.914170742034912
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,12,4,128,1,fp8,fp8,0,1.3621279398600261
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,12,4,128,1,float16,fp8,0,1.8873012860616047
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,12,1,128,1,float16,fp8,0,0.9845866362253824
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,12,12,128,1,float16,fp8,0,0.9937706788380941
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,12,12,128,1,fp8,fp8,0,0.725045363108317
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,12,12,128,1,float16,float16,0,0.9905760288238525
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,12,2,128,1,float16,float16,0,0.9954133033752441
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,12,2,128,1,fp8,fp8,0,0.7104266484578451
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,12,1,128,1,fp8,fp8,0,0.706501324971517
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,12,2,128,1,float16,fp8,0,0.986512025197347
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,12,12,128,1,float16,float16,0,0.5388373136520386
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,12,4,128,1,float16,fp8,0,0.9947413603464762
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,12,4,128,1,float16,float16,0,1.005776007970174
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,12,4,128,1,fp8,fp8,0,0.7198399702707926
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,12,1,128,1,float16,float16,0,0.5447573264439901
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,12,1,128,1,float16,fp8,0,0.5386293331782023
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,12,1,128,1,fp8,fp8,0,0.36648531754811603
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,12,12,128,1,float16,fp8,0,0.5373919804890951
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,12,2,128,1,float16,float16,0,0.5491146643956503
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,12,2,128,1,fp8,fp8,0,0.36767999331156415
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,12,2,128,1,float16,fp8,0,0.5414880116780599
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,12,12,128,1,fp8,fp8,0,0.3757439851760864
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,12,4,128,1,float16,float16,0,0.5491093397140503
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,12,4,128,1,float16,fp8,0,0.5442879994710287
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,12,1,128,1,float16,float16,0,0.27324267228444415
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,12,12,128,1,float16,float16,0,0.2731893261273702
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,12,1,128,1,float16,fp8,0,0.26923199494679767
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,12,12,128,1,float16,fp8,0,0.27275200684865314
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,12,4,128,1,fp8,fp8,0,0.37143464883168537
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,12,12,128,1,fp8,fp8,0,0.2122453252474467
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,12,2,128,1,float16,float16,0,0.27449599901835126
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,12,2,128,1,fp8,fp8,0,0.20866666237513223
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,12,1,128,1,fp8,fp8,0,0.20604799191157022
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,12,4,128,1,float16,float16,0,0.27508799235026044
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,12,4,128,1,float16,fp8,0,0.27380265792210895
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,12,4,128,1,fp8,fp8,0,0.21061867475509644
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,12,2,128,1,float16,fp8,0,0.2714719971021016
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,12,1,128,1,float16,float16,0,3.7045440673828125
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,12,1,128,1,float16,fp8,0,3.626688003540039
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,12,1,128,1,fp8,fp8,0,2.6332106590270996
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,12,2,128,1,fp8,fp8,0,2.657536029815674
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,12,2,128,1,float16,fp8,0,3.702063878377279
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,12,2,128,1,float16,float16,0,3.7518345514933267
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,12,4,128,1,float16,float16,0,3.6968959172566733
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,12,4,128,1,float16,fp8,0,3.6577278772989907
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,12,1,128,1,float16,float16,0,1.8386294047037761
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,12,1,128,1,float16,fp8,0,1.8081493377685547
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,12,1,128,1,fp8,fp8,0,1.3410666783650715
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,12,2,128,1,float16,float16,0,1.8636372884114583
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,12,12,128,1,float16,float16,0,1.8798826535542805
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,12,4,128,1,fp8,fp8,0,2.6926825841267905
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,12,12,128,1,float16,fp8,0,1.851653258005778
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,12,12,128,1,fp8,fp8,0,1.4193280537923176
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,12,2,128,1,float16,fp8,0,1.8168373107910156
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,12,2,128,1,fp8,fp8,0,1.3535572687784831
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,12,1,128,1,float16,float16,0,0.9493227005004883
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,12,4,128,1,float16,float16,0,1.8645973205566406
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,12,1,128,1,float16,fp8,0,0.9337653319040934
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,12,12,128,1,float16,float16,0,0.9526293277740479
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,12,4,128,1,float16,fp8,0,1.852549393971761
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,12,12,128,1,float16,fp8,0,0.9556000232696533
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,12,4,128,1,fp8,fp8,0,1.378885269165039
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,12,12,128,1,fp8,fp8,0,0.7354986667633057
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,12,1,128,1,fp8,fp8,0,0.695898691813151
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,12,2,128,1,float16,float16,0,0.9513866901397705
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,12,2,128,1,float16,fp8,0,0.9368586540222168
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,12,2,128,1,fp8,fp8,0,0.701311985651652
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,12,4,128,1,float16,float16,0,0.9654026826222738
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,12,4,128,1,float16,fp8,0,0.9493333498636881
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,12,4,128,1,fp8,fp8,0,0.7137227058410645
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,12,12,128,1,float16,float16,0,0.5153173208236694
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,12,1,128,1,float16,float16,0,0.510261336962382
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,12,12,128,1,float16,fp8,0,0.5078773498535156
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,12,1,128,1,float16,fp8,0,0.49985067049662274
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,12,12,128,1,fp8,fp8,0,0.3903786738713582
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,12,1,128,1,fp8,fp8,0,0.36081600189208984
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,12,2,128,1,float16,float16,0,0.5126346747080485
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,12,2,128,1,float16,fp8,0,0.5050986607869467
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,12,2,128,1,fp8,fp8,0,0.36373333136240643
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,12,4,128,1,float16,float16,0,0.5144799947738647
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,12,4,128,1,float16,fp8,0,0.5081653197606405
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,12,4,128,1,fp8,fp8,0,0.36883731683095294
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,12,1,128,1,float16,float16,0,0.25699732700983685
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,12,1,128,1,float16,fp8,0,0.25336533784866333
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,12,12,128,1,float16,float16,0,0.26154667139053345
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,12,12,128,1,fp8,fp8,0,0.21001599232355753
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,12,1,128,1,fp8,fp8,0,0.19920533895492554
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,12,12,128,1,float16,fp8,0,0.26184000571568805
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,12,2,128,1,float16,float16,0,0.2568746606508891
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,12,2,128,1,float16,fp8,0,0.2523039976755778
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,12,2,128,1,fp8,fp8,0,0.20088533560434976
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,12,4,128,1,float16,fp8,0,0.25391467412312824
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,12,4,128,1,float16,float16,0,0.26050132513046265
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,12,4,128,1,fp8,fp8,0,0.20403732856114706
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,12,12,128,1,float16,float16,0,0.15197867155075073
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,12,12,128,1,float16,fp8,0,0.15130133430163065
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,12,12,128,1,fp8,fp8,0,0.12311999996503194
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,12,1,128,1,float16,float16,0,0.149536003669103
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,12,1,128,1,float16,fp8,0,0.14813866217931113
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,12,2,128,1,float16,float16,0,0.1511733333269755
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,12,1,128,1,fp8,fp8,0,0.11614400148391724
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,12,2,128,1,float16,fp8,0,0.1490506629149119
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,12,2,128,1,fp8,fp8,0,0.11821867028872173
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,12,4,128,1,float16,float16,0,0.15229866902033487
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,12,4,128,1,float16,fp8,0,0.15017066399256387
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,12,4,128,1,fp8,fp8,0,0.11988266309102376
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,12,1,128,1,float16,float16,0,2.270357290903727
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,12,1,128,1,fp8,fp8,0,1.6843627293904622
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,12,1,128,1,float16,fp8,0,2.2207306226094565
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,12,2,128,1,float16,fp8,0,2.214421272277832
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,12,2,128,1,float16,float16,0,2.2718666394551597
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,12,2,128,1,fp8,fp8,0,1.7029919624328613
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,12,4,128,1,float16,fp8,0,2.2313332557678223
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,12,4,128,1,float16,float16,0,2.2922239303588867
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,12,1,128,1,float16,float16,0,1.146938641866048
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,12,1,128,1,float16,fp8,0,1.1276533603668213
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,12,1,128,1,fp8,fp8,0,0.8603946367899576
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,12,2,128,1,float16,float16,0,1.1459253629048665
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,12,12,128,1,float16,float16,0,1.1716586748758953
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,12,12,128,1,float16,fp8,0,1.171562671661377
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,12,4,128,1,fp8,fp8,0,1.7324693997701008
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,12,12,128,1,fp8,fp8,0,0.918405294418335
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,12,2,128,1,float16,fp8,0,1.1293226877848308
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,12,2,128,1,fp8,fp8,0,0.8711946805318197
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,12,4,128,1,float16,float16,0,1.1550239721934001
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,12,4,128,1,float16,fp8,0,1.1373706658681233
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,12,1,128,1,float16,float16,0,0.6001013517379761
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,12,12,128,1,float16,float16,0,0.6138453483581543
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,12,12,128,1,float16,fp8,0,0.6090346574783325
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,12,4,128,1,fp8,fp8,0,0.884394645690918
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,12,1,128,1,float16,fp8,0,0.588640014330546
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,12,12,128,1,fp8,fp8,0,0.4846026500066121
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,12,1,128,1,fp8,fp8,0,0.45234131813049316
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,12,2,128,1,float16,float16,0,0.5989013512929281
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,12,2,128,1,float16,fp8,0,0.5919146537780762
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,12,2,128,1,fp8,fp8,0,0.45945600668589276
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,12,4,128,1,float16,float16,0,0.6057920058568319
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,12,4,128,1,float16,fp8,0,0.5944426854451498
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,12,4,128,1,fp8,fp8,0,0.4681386550267537
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,12,1,128,1,float16,float16,0,0.3265226682027181
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,12,12,128,1,float16,float16,0,0.3323626716931661
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,12,12,128,1,float16,fp8,0,0.32810133695602417
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,12,1,128,1,float16,fp8,0,0.3229866623878479
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,12,12,128,1,fp8,fp8,0,0.257695992787679
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,12,1,128,1,fp8,fp8,0,0.2373973329861959
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,12,2,128,1,float16,float16,0,0.3275199929873149
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,12,2,128,1,fp8,fp8,0,0.23926933606465658
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,12,2,128,1,float16,fp8,0,0.32466665903727215
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,12,12,128,1,float16,float16,0,0.17150932550430298
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,12,4,128,1,float16,float16,0,0.3320586681365967
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,12,4,128,1,float16,fp8,0,0.3262453277905782
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,12,4,128,1,fp8,fp8,0,0.24459733565648398
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,12,12,128,1,float16,fp8,0,0.17165333032608032
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,12,12,128,1,fp8,fp8,0,0.14202133814493814
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,12,1,128,1,float16,float16,0,0.16742932796478271
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,12,1,128,1,float16,fp8,0,0.1644373337427775
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,12,2,128,1,float16,float16,0,0.16797866423924765
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,12,1,128,1,fp8,fp8,0,0.13169067104657492
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,12,2,128,1,float16,fp8,0,0.16460266709327698
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,12,2,128,1,fp8,fp8,0,0.13479466239611307
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,12,4,128,1,float16,float16,0,0.1700106660525004
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,12,4,128,1,float16,fp8,0,0.16613866885503134
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,12,4,128,1,fp8,fp8,0,0.13690666357676187
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,12,12,128,1,float16,float16,0,0.10071999828020732
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,12,12,128,1,float16,fp8,0,0.09985599915186565
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,12,12,128,1,fp8,fp8,0,0.08591467142105103
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,12,1,128,1,float16,float16,0,0.09944533308347066
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,12,1,128,1,float16,fp8,0,0.09905067086219788
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,12,1,128,1,fp8,fp8,0,0.07964266836643219
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,12,2,128,1,float16,float16,0,0.09922666351000468
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,12,2,128,1,float16,fp8,0,0.09891200065612793
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,12,4,128,1,float16,float16,0,0.09991466999053955
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,12,4,128,1,fp8,fp8,0,0.08096000055472057
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,12,4,128,1,float16,fp8,0,0.09902933239936829
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,12,2,128,1,fp8,fp8,0,0.08038400113582611
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,12,1,128,1,float16,fp8,0,2.2847092946370444
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,12,1,128,1,float16,float16,0,2.3970774014790854
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,12,1,128,1,fp8,fp8,0,1.8595360120137532
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,12,2,128,1,float16,float16,0,2.4102560679117837
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,12,2,128,1,fp8,fp8,0,1.8739360173543294
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,12,2,128,1,float16,fp8,0,2.295685291290283
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,12,4,128,1,float16,float16,0,2.4336959520975747
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,12,4,128,1,float16,fp8,0,2.3421759605407715
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,12,1,128,1,float16,float16,0,1.1940853595733643
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,12,1,128,1,float16,fp8,0,1.1566720008850098
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,12,1,128,1,fp8,fp8,0,0.9392320315043131
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,12,12,128,1,float16,float16,0,1.2671199639638264
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,12,12,128,1,float16,fp8,0,1.217039982477824
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,12,4,128,1,fp8,fp8,0,1.915013313293457
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,12,2,128,1,float16,float16,0,1.2090453306833904
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,12,2,128,1,float16,fp8,0,1.159546693166097
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,12,12,128,1,fp8,fp8,0,1.0301547050476074
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,12,2,128,1,fp8,fp8,0,0.947866678237915
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,12,1,128,1,float16,float16,0,0.6134933233261108
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,12,1,128,1,float16,fp8,0,0.5965493520100912
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,12,4,128,1,float16,float16,0,1.2219040393829346
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,12,4,128,1,float16,fp8,0,1.183840036392212
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,12,4,128,1,fp8,fp8,0,0.9680960178375244
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,12,12,128,1,float16,float16,0,0.6397173404693604
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,12,12,128,1,fp8,fp8,0,0.5299306710561117
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,12,1,128,1,fp8,fp8,0,0.48385600248972577
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,12,12,128,1,float16,fp8,0,0.6327626705169678
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,12,2,128,1,float16,float16,0,0.6154133478800455
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,12,2,128,1,float16,fp8,0,0.6002346674601237
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,12,2,128,1,fp8,fp8,0,0.4898666540781657
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,12,4,128,1,float16,fp8,0,0.6082293192545573
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,12,4,128,1,float16,float16,0,0.6206719875335693
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,12,1,128,1,float16,float16,0,0.3262666662534078
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,12,4,128,1,fp8,fp8,0,0.4989813168843587
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,12,12,128,1,float16,float16,0,0.3386773268381755
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,12,1,128,1,float16,fp8,0,0.32025599479675293
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,12,12,128,1,fp8,fp8,0,0.28277333577473956
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,12,1,128,1,fp8,fp8,0,0.2503040035565694
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,12,12,128,1,float16,fp8,0,0.3364479939142863
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,12,2,128,1,float16,float16,0,0.32761067152023315
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,12,2,128,1,float16,fp8,0,0.32128532727559406
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,12,2,128,1,fp8,fp8,0,0.2529333432515462
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,12,4,128,1,float16,float16,0,0.33293867111206055
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,12,4,128,1,float16,fp8,0,0.3269706765810649
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,12,4,128,1,fp8,fp8,0,0.2587253252665202
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,12,1,128,1,float16,float16,0,0.16701332728068033
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,12,12,128,1,float16,float16,0,0.17919999361038208
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,12,1,128,1,float16,fp8,0,0.16344533363978067
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,12,12,128,1,fp8,fp8,0,0.1515679955482483
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,12,12,128,1,float16,fp8,0,0.17775466044743857
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,12,1,128,1,fp8,fp8,0,0.13773866494496664
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,12,2,128,1,float16,float16,0,0.16900799671808878
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,12,2,128,1,float16,fp8,0,0.1653333306312561
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,12,2,128,1,fp8,fp8,0,0.13865066568056741
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,12,4,128,1,float16,float16,0,0.17103999853134155
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,12,4,128,1,float16,fp8,0,0.1671733260154724
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,12,4,128,1,fp8,fp8,0,0.1422826647758484
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,12,1,128,1,float16,float16,0,0.09546666344006856
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,12,12,128,1,float16,fp8,0,0.099263995885849
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,12,1,128,1,float16,fp8,0,0.09351467092831929
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,12,12,128,1,fp8,fp8,0,0.08842666943868001
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,12,12,128,1,float16,float16,0,0.100490669409434
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,12,1,128,1,fp8,fp8,0,0.0774239997069041
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,12,2,128,1,float16,float16,0,0.0957493285338084
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,12,2,128,1,fp8,fp8,0,0.07877866427103679
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,12,2,128,1,float16,fp8,0,0.0939520001411438
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,12,4,128,1,float16,float16,0,0.09757866462071736
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,12,4,128,1,float16,fp8,0,0.09541333715120952
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,12,4,128,1,fp8,fp8,0,0.0820853312810262
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,12,12,128,1,float16,fp8,0,0.060090666015942894
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,12,12,128,1,fp8,fp8,0,0.05473066866397858
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,12,12,128,1,float16,float16,0,0.060517330964406334
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,12,1,128,1,float16,float16,0,0.06078400214513143
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,12,1,128,1,float16,fp8,0,0.06006933252016703
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,12,1,128,1,fp8,fp8,0,0.05153066913286845
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,12,2,128,1,float16,float16,0,0.06122133135795593
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,12,2,128,1,float16,fp8,0,0.06055466830730438
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,12,2,128,1,fp8,fp8,0,0.051957334081331887
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,12,4,128,1,fp8,fp8,0,0.05262400209903717
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,12,4,128,1,float16,float16,0,0.06079466640949249
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,12,4,128,1,float16,fp8,0,0.060693333546320595
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,12,1,128,1,float16,float16,0,1.5257919629414876
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,12,1,128,1,float16,fp8,0,1.4751200675964355
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,12,1,128,1,fp8,fp8,0,1.235210657119751
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,12,2,128,1,float16,float16,0,1.5422560373942058
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,12,2,128,1,float16,fp8,0,1.4866612752278645
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,12,2,128,1,fp8,fp8,0,1.2602880001068115
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,12,4,128,1,float16,float16,0,1.5563680330912273
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,12,4,128,1,float16,fp8,0,1.5264320373535156
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,12,1,128,1,float16,float16,0,0.7756640116373698
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,12,1,128,1,float16,fp8,0,0.75709335009257
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,12,1,128,1,fp8,fp8,0,0.6341280142466227
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,12,12,128,1,float16,fp8,0,0.8017333348592123
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,12,12,128,1,float16,float16,0,0.8178880214691162
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,12,12,128,1,fp8,fp8,0,0.7108213106791178
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,12,4,128,1,fp8,fp8,0,1.2960480054219563
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,12,2,128,1,float16,fp8,0,0.7584373156229655
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,12,2,128,1,fp8,fp8,0,0.6431093215942383
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,12,2,128,1,float16,float16,0,0.7814880212148031
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,12,4,128,1,float16,float16,0,0.7891253630320231
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,12,1,128,1,float16,float16,0,0.4033706585566203
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,12,1,128,1,float16,fp8,0,0.39350398381551105
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,12,12,128,1,float16,float16,0,0.425653338432312
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,12,4,128,1,float16,fp8,0,0.7733706633249918
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,12,12,128,1,fp8,fp8,0,0.370746652285258
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,12,12,128,1,float16,fp8,0,0.41803733507792157
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,12,4,128,1,fp8,fp8,0,0.6630560159683228
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,12,1,128,1,fp8,fp8,0,0.3325173258781433
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,12,2,128,1,float16,float16,0,0.406058669090271
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,12,2,128,1,fp8,fp8,0,0.33824535210927326
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,12,2,128,1,float16,fp8,0,0.39691734313964844
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,12,4,128,1,float16,float16,0,0.41021867593129474
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,12,12,128,1,float16,float16,0,0.23015999794006348
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,12,4,128,1,float16,fp8,0,0.40484265486399335
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,12,12,128,1,float16,fp8,0,0.2264853318532308
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,12,1,128,1,float16,float16,0,0.21954667568206787
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,12,4,128,1,fp8,fp8,0,0.34434131781260174
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,12,12,128,1,fp8,fp8,0,0.19506667057673135
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,12,1,128,1,fp8,fp8,0,0.1722773313522339
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,12,2,128,1,float16,float16,0,0.22107734282811484
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,12,2,128,1,float16,fp8,0,0.21638399362564087
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,12,1,128,1,float16,fp8,0,0.21377599239349365
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,12,2,128,1,fp8,fp8,0,0.17588265736897787
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,12,4,128,1,float16,float16,0,0.22458134094874063
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,12,4,128,1,fp8,fp8,0,0.17917333046595255
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,12,12,128,1,float16,float16,0,0.12196800112724304
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,12,4,128,1,float16,fp8,0,0.21832533677419028
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,12,12,128,1,fp8,fp8,0,0.10986666878064473
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,12,12,128,1,float16,fp8,0,0.12220266461372375
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,12,1,128,1,float16,fp8,0,0.11188800136248271
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,12,1,128,1,float16,float16,0,0.11430933078130086
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,12,1,128,1,fp8,fp8,0,0.09686400492986043
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,12,2,128,1,float16,float16,0,0.11540266871452332
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,12,2,128,1,float16,fp8,0,0.11328533291816711
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,12,4,128,1,float16,fp8,0,0.11525866389274597
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,12,4,128,1,float16,float16,0,0.11654399832089742
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,12,4,128,1,fp8,fp8,0,0.10251200199127197
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,12,2,128,1,fp8,fp8,0,0.09966933727264404
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,12,12,128,1,float16,fp8,0,0.07123733560244243
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,12,12,128,1,float16,float16,0,0.0718399981657664
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,12,12,128,1,fp8,fp8,0,0.06598933537801106
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,12,1,128,1,float16,float16,0,0.06815466781457265
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,12,1,128,1,float16,fp8,0,0.06717866659164429
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,12,1,128,1,fp8,fp8,0,0.05735999842484792
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,12,2,128,1,float16,float16,0,0.06809066732724507
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,12,2,128,1,fp8,fp8,0,0.05779733260472616
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,12,2,128,1,float16,fp8,0,0.06727466483910878
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,12,4,128,1,float16,fp8,0,0.06809600194295247
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,12,4,128,1,fp8,fp8,0,0.05925333499908447
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,12,12,128,1,float16,float16,0,0.04890666902065277
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,12,12,128,1,float16,fp8,0,0.04877866804599762
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,12,4,128,1,float16,float16,0,0.0691840002934138
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,12,12,128,1,fp8,fp8,0,0.044026667873064675
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,12,1,128,1,float16,float16,0,0.04909333089987437
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,12,2,128,1,float16,float16,0,0.04909333089987437
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,12,2,128,1,float16,fp8,0,0.048528000712394714
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,12,1,128,1,fp8,fp8,0,0.04223466912905375
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,12,1,128,1,float16,fp8,0,0.048170665899912514
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,12,2,128,1,fp8,fp8,0,0.04238399863243103
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,12,4,128,1,float16,float16,0,0.049098665515581764
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,12,4,128,1,float16,fp8,0,0.04850666721661886
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,12,4,128,1,fp8,fp8,0,0.04293333490689596
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,12,1,128,1,float16,fp8,0,1.4515199661254883
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,12,1,128,1,fp8,fp8,0,1.2264906565348308
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,12,1,128,1,float16,float16,0,1.4528640111287434
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,12,2,128,1,float16,fp8,0,1.4648426373799641
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,12,2,128,1,float16,float16,0,1.460309346516927
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,12,2,128,1,fp8,fp8,0,1.3077706495920818
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,12,4,128,1,float16,float16,0,1.5487467447916667
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,12,4,128,1,float16,fp8,0,1.5734507242838542
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,12,1,128,1,float16,float16,0,0.7348426977793375
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,12,1,128,1,float16,fp8,0,0.7364640235900879
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,12,1,128,1,fp8,fp8,0,0.6151893138885498
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,12,2,128,1,float16,float16,0,0.7439786593119303
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,12,12,128,1,float16,fp8,0,0.8325599829355875
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,12,12,128,1,float16,float16,0,0.8453280131022135
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,12,4,128,1,fp8,fp8,0,1.4178454081217449
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,12,12,128,1,fp8,fp8,0,0.729477326075236
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,12,2,128,1,float16,fp8,0,0.7393919626871744
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,12,2,128,1,fp8,fp8,0,0.662554661432902
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,12,4,128,1,float16,float16,0,0.7758346398671468
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,12,4,128,1,float16,fp8,0,0.7850240071614584
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,12,1,128,1,float16,float16,0,0.3805760145187378
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,12,12,128,1,float16,float16,0,0.4301439921061198
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,12,4,128,1,fp8,fp8,0,0.7168959776560465
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,12,12,128,1,float16,fp8,0,0.4267520109812419
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,12,12,128,1,fp8,fp8,0,0.3767999807993571
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,12,1,128,1,float16,fp8,0,0.381877342859904
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,12,1,128,1,fp8,fp8,0,0.31836267312367755
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,12,2,128,1,float16,float16,0,0.3880426486333211
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,12,2,128,1,float16,fp8,0,0.3834986686706543
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,12,2,128,1,fp8,fp8,0,0.33619733651479083
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,12,4,128,1,fp8,fp8,0,0.372165322303772
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,12,4,128,1,float16,float16,0,0.40038931369781494
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,12,4,128,1,float16,fp8,0,0.3960640033086141
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,12,12,128,1,float16,fp8,0,0.22371200720469156
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,12,1,128,1,float16,float16,0,0.20361600319544473
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,12,12,128,1,float16,float16,0,0.22827200094858804
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,12,12,128,1,fp8,fp8,0,0.1948960026105245
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,12,1,128,1,float16,fp8,0,0.2048106590906779
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,12,1,128,1,fp8,fp8,0,0.15940266847610474
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,12,2,128,1,float16,float16,0,0.20782933632532755
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,12,2,128,1,fp8,fp8,0,0.16339733203252158
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,12,2,128,1,float16,fp8,0,0.20772266387939453
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,12,12,128,1,float16,float16,0,0.12488533059755962
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,12,4,128,1,float16,float16,0,0.213210662206014
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,12,4,128,1,float16,fp8,0,0.2116053303082784
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,12,4,128,1,fp8,fp8,0,0.1853653391202291
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,12,1,128,1,float16,float16,0,0.10724266370137532
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,12,1,128,1,float16,fp8,0,0.10865599910418193
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,12,12,128,1,float16,fp8,0,0.1223306655883789
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,12,12,128,1,fp8,fp8,0,0.10331733028093974
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,12,1,128,1,fp8,fp8,0,0.08781333764394124
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,12,2,128,1,fp8,fp8,0,0.0906826655069987
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,12,2,128,1,float16,float16,0,0.10833600163459778
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,12,2,128,1,float16,fp8,0,0.10918399691581726
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,12,4,128,1,float16,float16,0,0.11291733384132385
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,12,4,128,1,float16,fp8,0,0.11190399527549744
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,12,4,128,1,fp8,fp8,0,0.09798933068911235
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,12,12,128,1,float16,float16,0,0.06615466872851054
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,12,12,128,1,float16,fp8,0,0.06530666848023732
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,12,12,128,1,fp8,fp8,0,0.05843733251094818
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,12,1,128,1,fp8,fp8,0,0.04902400076389313
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,12,2,128,1,float16,float16,0,0.05938133100668589
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,12,1,128,1,float16,fp8,0,0.05843733251094818
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,12,1,128,1,float16,float16,0,0.058389330903689064
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,12,2,128,1,float16,fp8,0,0.05924266576766968
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,12,2,128,1,fp8,fp8,0,0.05156266689300537
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,12,4,128,1,float16,float16,0,0.06081599990526835
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,12,4,128,1,float16,fp8,0,0.0614026685555776
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,12,4,128,1,fp8,fp8,0,0.055642664432525635
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,12,12,128,1,float16,float16,0,0.03791466603676478
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,12,1,128,1,float16,float16,0,0.03509333233038584
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,12,1,128,1,float16,fp8,0,0.03562133262554804
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,12,12,128,1,fp8,fp8,0,0.034815999368826546
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,12,1,128,1,fp8,fp8,0,0.03156800071398417
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,12,12,128,1,float16,fp8,0,0.03772266705830892
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,12,2,128,1,float16,float16,0,0.0354666660229365
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,12,2,128,1,float16,fp8,0,0.035631999373435974
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,12,2,128,1,fp8,fp8,0,0.03236799935499827
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,12,4,128,1,float16,float16,0,0.03629866739114126
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,12,4,128,1,float16,fp8,0,0.036746665835380554
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,12,12,128,1,float16,float16,0,0.03267733256022135
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,12,4,128,1,fp8,fp8,0,0.03363733241955439
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,12,1,128,1,float16,float16,0,0.031717332700888314
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,12,12,128,1,fp8,fp8,0,0.02871999889612198
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,12,12,128,1,float16,fp8,0,0.03259200106064478
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,12,1,128,1,float16,fp8,0,0.032085334261258446
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,12,2,128,1,float16,float16,0,0.032127998769283295
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,12,1,128,1,fp8,fp8,0,0.02737066646416982
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,12,2,128,1,float16,fp8,0,0.03223466624816259
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,12,2,128,1,fp8,fp8,0,0.027376001079877216
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,12,4,128,1,float16,float16,0,0.03244800120592117
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,12,4,128,1,float16,fp8,0,0.03264000018437704
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,12,4,128,1,fp8,fp8,0,0.028757333755493164
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,12,1,128,1,float16,fp8,0,1.1253706614176433
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,12,1,128,1,float16,float16,0,1.1289546489715576
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,12,1,128,1,fp8,fp8,0,1.0218186378479004
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,12,2,128,1,float16,float16,0,1.17684801419576
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,12,2,128,1,fp8,fp8,0,1.0857600371042888
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,12,2,128,1,float16,fp8,0,1.1913866996765137
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,12,4,128,1,float16,float16,0,1.2518826325734456
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,12,4,128,1,float16,fp8,0,1.2433546384175618
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,12,1,128,1,float16,float16,0,0.5733173290888468
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,12,1,128,1,float16,fp8,0,0.5697280168533325
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,12,1,128,1,fp8,fp8,0,0.5166506767272949
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,12,12,128,1,float16,fp8,0,0.6784959634145101
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,12,12,128,1,float16,float16,0,0.6918773651123047
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,12,2,128,1,float16,float16,0,0.5857760111490885
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,12,4,128,1,fp8,fp8,0,1.2013920148213704
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,12,2,128,1,float16,fp8,0,0.5805600086847941
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,12,12,128,1,fp8,fp8,0,0.6255840063095093
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,12,2,128,1,fp8,fp8,0,0.5660266478856405
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,12,4,128,1,float16,float16,0,0.6304320096969604
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,12,1,128,1,float16,float16,0,0.30003732442855835
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,12,1,128,1,float16,fp8,0,0.29682666063308716
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,12,4,128,1,float16,fp8,0,0.6216053167978922
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,12,4,128,1,fp8,fp8,0,0.6111146608988444
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,12,12,128,1,float16,fp8,0,0.3413333495457967
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,12,12,128,1,fp8,fp8,0,0.3208746711413066
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,12,12,128,1,float16,float16,0,0.35275201002756756
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,12,1,128,1,fp8,fp8,0,0.26739732424418133
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,12,2,128,1,float16,float16,0,0.30567467212677
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,12,2,128,1,fp8,fp8,0,0.28446932633717853
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,12,2,128,1,float16,fp8,0,0.3012479941050212
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,12,4,128,1,float16,float16,0,0.3202986717224121
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,12,4,128,1,float16,fp8,0,0.3136533300081889
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,12,4,128,1,fp8,fp8,0,0.31218665838241577
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,12,12,128,1,float16,float16,0,0.18780267238616943
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,12,12,128,1,fp8,fp8,0,0.16758400201797485
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,12,12,128,1,float16,fp8,0,0.18236800034840903
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,12,1,128,1,float16,float16,0,0.16059733430544534
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,12,1,128,1,float16,fp8,0,0.1586240033308665
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,12,1,128,1,fp8,fp8,0,0.13398399949073792
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,12,2,128,1,float16,float16,0,0.16314133008321127
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,12,2,128,1,float16,fp8,0,0.16149333119392395
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,12,4,128,1,float16,float16,0,0.1712053418159485
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,12,4,128,1,float16,fp8,0,0.16868799924850464
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,12,2,128,1,fp8,fp8,0,0.141157329082489
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,12,4,128,1,fp8,fp8,0,0.1564906636873881
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,12,1,128,1,float16,float16,0,0.0870293378829956
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,12,12,128,1,float16,float16,0,0.10672533512115479
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,12,1,128,1,float16,fp8,0,0.08676266670227051
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,12,12,128,1,float16,fp8,0,0.10361599922180176
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,12,12,128,1,fp8,fp8,0,0.08860266208648682
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,12,1,128,1,fp8,fp8,0,0.07504533231258392
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,12,2,128,1,float16,float16,0,0.08946133653322856
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,12,2,128,1,float16,fp8,0,0.0885759989420573
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,12,2,128,1,fp8,fp8,0,0.07691200077533722
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,12,4,128,1,float16,float16,0,0.09221866726875305
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,12,4,128,1,float16,fp8,0,0.0920960009098053
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,12,4,128,1,fp8,fp8,0,0.0849226713180542
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,12,12,128,1,float16,float16,0,0.05731200178464254
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,12,12,128,1,float16,fp8,0,0.05629866818586985
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,12,1,128,1,float16,float16,0,0.04910400013128916
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,12,1,128,1,float16,fp8,0,0.04853333532810211
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,12,1,128,1,fp8,fp8,0,0.04266666869322459
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,12,2,128,1,float16,fp8,0,0.049770668148994446
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,12,2,128,1,float16,float16,0,0.04942933221658071
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,12,2,128,1,fp8,fp8,0,0.04541333516438802
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,12,12,128,1,fp8,fp8,0,0.05271466573079427
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,12,4,128,1,float16,float16,0,0.052298665046691895
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,12,4,128,1,float16,fp8,0,0.05188799897829691
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,12,4,128,1,fp8,fp8,0,0.04855466882387797
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,12,12,128,1,float16,float16,0,0.033546666304270424
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,12,12,128,1,float16,fp8,0,0.032960000137488045
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,12,12,128,1,fp8,fp8,0,0.032229334115982056
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,12,1,128,1,float16,float16,0,0.031173333525657654
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,12,1,128,1,fp8,fp8,0,0.029322666426499683
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,12,1,128,1,float16,fp8,0,0.03129599988460541
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,12,2,128,1,float16,float16,0,0.03161066770553589
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,12,2,128,1,float16,fp8,0,0.0317546675602595
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,12,4,128,1,fp8,fp8,0,0.03151999910672506
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,12,2,128,1,fp8,fp8,0,0.02995733420054118
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,12,4,128,1,float16,float16,0,0.03250666707754135
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,12,12,128,1,float16,float16,0,0.02595199892918269
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,12,4,128,1,float16,fp8,0,0.03229333211978277
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,12,1,128,1,float16,float16,0,0.024864000578721363
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,12,12,128,1,fp8,fp8,0,0.02492266645034154
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,12,1,128,1,float16,fp8,0,0.025290665527184803
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,12,2,128,1,float16,fp8,0,0.025386666258176167
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,12,2,128,1,float16,float16,0,0.025370667378107708
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,12,12,128,1,float16,fp8,0,0.026133333643277485
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,12,2,128,1,fp8,fp8,0,0.023434666295846302
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,12,1,128,1,fp8,fp8,0,0.022895999252796173
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,12,4,128,1,float16,float16,0,0.02566933383544286
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,12,4,128,1,float16,fp8,0,0.02603733291228612
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,12,4,128,1,fp8,fp8,0,0.024714666108290356
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,12,12,128,1,float16,float16,0,0.022042666872342426
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,12,12,128,1,float16,fp8,0,0.02237333357334137
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,12,1,128,1,float16,float16,0,0.022181332111358643
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,12,1,128,1,float16,fp8,0,0.022287999590237934
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,12,1,128,1,fp8,fp8,0,0.019487999379634857
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,12,2,128,1,float16,fp8,0,0.022437334060668945
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,12,2,128,1,float16,float16,0,0.02178666740655899
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,12,12,128,1,fp8,fp8,0,0.02015999952952067
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,12,2,128,1,fp8,fp8,0,0.020181333025296528
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,12,4,128,1,float16,float16,0,0.022357332209746044
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,12,4,128,1,float16,fp8,0,0.022629333039124806
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,12,4,128,1,fp8,fp8,0,0.019946667055288952
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,12,1,128,1,float16,float16,0,0.4809120098749797
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,12,1,128,1,fp8,fp8,0,0.42449601491292316
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,12,1,128,1,float16,fp8,0,0.4803253412246704
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,12,2,128,1,float16,fp8,0,0.49536534150441486
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,12,2,128,1,fp8,fp8,0,0.4757066567738851
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,12,2,128,1,float16,float16,0,0.49753065903981525
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,12,4,128,1,float16,float16,0,0.5407573382059733
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,12,4,128,1,float16,fp8,0,0.5372000137964884
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,12,1,128,1,float16,float16,0,0.2516533335049947
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,12,1,128,1,fp8,fp8,0,0.21831466754277548
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,12,1,128,1,float16,fp8,0,0.249834676583608
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,12,12,128,1,float16,float16,0,0.3142079909642537
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,12,4,128,1,fp8,fp8,0,0.5280479987462362
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,12,12,128,1,float16,fp8,0,0.3046613335609436
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,12,2,128,1,float16,float16,0,0.25782932837804157
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,12,12,128,1,fp8,fp8,0,0.2784000039100647
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,12,2,128,1,float16,fp8,0,0.2573653260866801
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,12,2,128,1,fp8,fp8,0,0.23796266317367554
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,12,1,128,1,float16,float16,0,0.136543999115626
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,12,4,128,1,float16,fp8,0,0.26917866865793866
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,12,4,128,1,float16,float16,0,0.27586134274800617
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,12,12,128,1,float16,fp8,0,0.16245333353678384
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,12,4,128,1,fp8,fp8,0,0.27244265874226886
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,12,12,128,1,fp8,fp8,0,0.1500320037206014
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,12,12,128,1,float16,float16,0,0.16758400201797485
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,12,1,128,1,float16,fp8,0,0.136245330174764
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,12,2,128,1,float16,float16,0,0.13941333691279092
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,12,1,128,1,fp8,fp8,0,0.11885866522789001
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,12,2,128,1,fp8,fp8,0,0.12627200285593668
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,12,4,128,1,float16,float16,0,0.1476693352063497
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,12,4,128,1,float16,fp8,0,0.14433067043622336
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,12,2,128,1,float16,fp8,0,0.13878400127092996
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,12,12,128,1,float16,float16,0,0.09571199615796407
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,12,4,128,1,fp8,fp8,0,0.1428053379058838
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,12,1,128,1,float16,float16,0,0.07671999931335449
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,12,1,128,1,float16,fp8,0,0.07674666742483775
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,12,1,128,1,fp8,fp8,0,0.0664213349421819
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,12,12,128,1,float16,fp8,0,0.09444266557693481
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,12,2,128,1,float16,fp8,0,0.07795199751853943
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,12,12,128,1,fp8,fp8,0,0.08227199812730153
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,12,2,128,1,float16,float16,0,0.07915199796358745
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,12,2,128,1,fp8,fp8,0,0.07074666519959767
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,12,4,128,1,fp8,fp8,0,0.07698133091131847
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,12,4,128,1,float16,fp8,0,0.08201600114504497
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,12,12,128,1,float16,float16,0,0.05258133510748545
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,12,4,128,1,float16,float16,0,0.08301333089669545
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,12,12,128,1,float16,fp8,0,0.05120000243186951
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,12,12,128,1,fp8,fp8,0,0.04870399832725525
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,12,1,128,1,fp8,fp8,0,0.03872533390919367
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,12,1,128,1,float16,float16,0,0.04429866870244344
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,12,2,128,1,float16,float16,0,0.04452266792456309
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,12,2,128,1,float16,fp8,0,0.04458666841189066
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,12,2,128,1,fp8,fp8,0,0.04035733391841253
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,12,1,128,1,float16,fp8,0,0.043968002001444496
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,12,4,128,1,float16,fp8,0,0.04683733483155569
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,12,4,128,1,float16,float16,0,0.04715733230113983
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,12,4,128,1,fp8,fp8,0,0.04451733330885569
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,12,12,128,1,float16,fp8,0,0.03145600110292435
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,12,1,128,1,float16,float16,0,0.029674666623274486
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,12,12,128,1,fp8,fp8,0,0.030266667405764263
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,12,1,128,1,fp8,fp8,0,0.02737066646416982
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,12,2,128,1,float16,float16,0,0.030421334008375805
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,12,12,128,1,float16,float16,0,0.03179733455181122
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,12,1,128,1,float16,fp8,0,0.02998399982849757
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,12,2,128,1,float16,fp8,0,0.03047466774781545
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,12,2,128,1,fp8,fp8,0,0.027829334139823914
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,12,4,128,1,float16,float16,0,0.03105599929889043
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,12,4,128,1,float16,fp8,0,0.031040000418821972
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,12,4,128,1,fp8,fp8,0,0.029520000020662945
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,12,12,128,1,float16,float16,0,0.023007998863856
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,12,1,128,1,float16,float16,0,0.022064000368118286
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,12,1,128,1,float16,fp8,0,0.022096000611782074
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,12,1,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,12,12,128,1,float16,fp8,0,0.023232000569502514
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,12,2,128,1,float16,float16,0,0.022634667654832203
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,12,2,128,1,float16,fp8,0,0.02235200007756551
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,12,12,128,1,fp8,fp8,0,0.022634667654832203
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,12,2,128,1,fp8,fp8,0,0.02125866711139679
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,12,4,128,1,float16,float16,0,0.023120000958442688
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,12,4,128,1,float16,fp8,0,0.023242667317390442
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,12,4,128,1,fp8,fp8,0,0.022570667167504627
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,12,12,128,1,float16,fp8,0,0.019621333728233974
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,12,12,128,1,fp8,fp8,0,0.01834133391578992
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,12,1,128,1,float16,fp8,0,0.018944000204404194
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,12,1,128,1,fp8,fp8,0,0.017701332767804463
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,12,12,128,1,float16,float16,0,0.019333332777023315
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,12,2,128,1,float16,float16,0,0.0185759998857975
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,12,2,128,1,float16,fp8,0,0.019066666563351948
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,12,2,128,1,fp8,fp8,0,0.017792000124851864
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,12,1,128,1,float16,float16,0,0.018672000616788864
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,12,4,128,1,float16,float16,0,0.018922666708628338
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,12,4,128,1,float16,fp8,0,0.01922133316596349
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,12,4,128,1,fp8,fp8,0,0.017968000223239262
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,12,12,128,1,float16,fp8,0,0.018698666244745255
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,12,12,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,12,1,128,1,float16,float16,0,0.01811733345190684
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,12,1,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,12,2,128,1,float16,float16,0,0.01798933371901512
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,12,1,128,1,float16,fp8,0,0.018063999712467194
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,12,2,128,1,float16,fp8,0,0.018266666680574417
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,12,12,128,1,float16,float16,0,0.01770666614174843
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,12,2,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,12,4,128,1,float16,float16,0,0.01811733345190684
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,12,4,128,1,float16,fp8,0,0.018474667022625606
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,12,4,128,1,fp8,fp8,0,0.01738133281469345
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,12,1,128,1,float16,float16,0,0.23483200867970785
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,12,1,128,1,fp8,fp8,0,0.22126932938893637
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,12,1,128,1,float16,fp8,0,0.2358293334643046
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,12,2,128,1,fp8,fp8,0,0.24670400222142538
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,12,2,128,1,float16,fp8,0,0.23865065972010294
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,12,4,128,1,float16,float16,0,0.25494933128356934
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,12,2,128,1,float16,float16,0,0.23902400334676108
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,12,4,128,1,float16,fp8,0,0.2553386688232422
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,12,1,128,1,float16,float16,0,0.12920000155766806
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,12,12,128,1,float16,float16,0,0.16621333360671997
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,12,1,128,1,float16,fp8,0,0.12959466377894083
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,12,4,128,1,fp8,fp8,0,0.2730773289998372
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,12,2,128,1,float16,float16,0,0.13198933005332947
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,12,12,128,1,float16,fp8,0,0.16157333056131998
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,12,1,128,1,fp8,fp8,0,0.1169653336207072
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,12,12,128,1,fp8,fp8,0,0.14797866344451904
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,12,2,128,1,float16,fp8,0,0.1306986709435781
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,12,2,128,1,fp8,fp8,0,0.1256053348382314
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,12,4,128,1,float16,float16,0,0.13989866773287454
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,12,4,128,1,float16,fp8,0,0.13767466942469278
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,12,4,128,1,fp8,fp8,0,0.14152000347773233
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,12,12,128,1,fp8,fp8,0,0.08195733527342479
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,12,12,128,1,float16,float16,0,0.09153067072232564
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,12,12,128,1,float16,fp8,0,0.09016000231107076
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,12,1,128,1,float16,fp8,0,0.07269333302974701
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,12,1,128,1,float16,float16,0,0.0720000018676122
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,12,1,128,1,fp8,fp8,0,0.06657599906126659
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,12,2,128,1,float16,float16,0,0.07353599866231282
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,12,2,128,1,float16,fp8,0,0.07374399900436401
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,12,2,128,1,fp8,fp8,0,0.07087466617425282
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,12,4,128,1,float16,float16,0,0.07726400097211202
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,12,4,128,1,fp8,fp8,0,0.07573866844177246
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,12,12,128,1,float16,fp8,0,0.0491893341143926
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,12,4,128,1,float16,fp8,0,0.07665066421031952
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,12,1,128,1,float16,float16,0,0.04132800052563349
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,12,12,128,1,fp8,fp8,0,0.04758933186531067
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,12,1,128,1,float16,fp8,0,0.04164266586303711
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,12,12,128,1,float16,float16,0,0.050154666105906166
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,12,2,128,1,float16,float16,0,0.0422986646493276
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,12,2,128,1,float16,fp8,0,0.04232533276081085
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,12,1,128,1,fp8,fp8,0,0.03851733356714249
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,12,4,128,1,float16,fp8,0,0.044853334625562034
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,12,4,128,1,float16,float16,0,0.044026667873064675
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,12,4,128,1,fp8,fp8,0,0.04446400205294291
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,12,12,128,1,float16,float16,0,0.03201066702604294
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,12,2,128,1,fp8,fp8,0,0.03977599988381068
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,12,12,128,1,float16,fp8,0,0.03162133445342382
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,12,1,128,1,float16,fp8,0,0.02924266705910365
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,12,12,128,1,fp8,fp8,0,0.030069333811601002
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,12,1,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,12,1,128,1,float16,float16,0,0.029146666328112285
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,12,2,128,1,float16,float16,0,0.029487999776999157
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,12,4,128,1,float16,float16,0,0.030506665507952373
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,12,4,128,1,fp8,fp8,0,0.029535998900731403
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,12,4,128,1,float16,fp8,0,0.0308693324526151
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,12,2,128,1,float16,fp8,0,0.029690665503342945
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,12,12,128,1,float16,float16,0,0.022458667556444805
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,12,12,128,1,float16,fp8,0,0.022789334257443745
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,12,2,128,1,fp8,fp8,0,0.02792533238728841
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,12,12,128,1,fp8,fp8,0,0.022613334159056347
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,12,1,128,1,float16,float16,0,0.021029333273569744
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,12,1,128,1,float16,fp8,0,0.021530665457248688
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,12,1,128,1,fp8,fp8,0,0.020784000555674236
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,12,2,128,1,float16,fp8,0,0.02149333308140437
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,12,2,128,1,float16,float16,0,0.021551998953024547
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,12,4,128,1,float16,fp8,0,0.022426667312781017
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,12,12,128,1,float16,float16,0,0.017765333255132038
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,12,2,128,1,fp8,fp8,0,0.021104000508785248
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,12,12,128,1,float16,fp8,0,0.018021332720915478
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,12,4,128,1,float16,float16,0,0.02203733225663503
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,12,4,128,1,fp8,fp8,0,0.022629333039124806
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,12,12,128,1,fp8,fp8,0,0.018133333573738735
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,12,1,128,1,float16,fp8,0,0.017690667261679966
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,12,1,128,1,fp8,fp8,0,0.01759999990463257
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,12,2,128,1,float16,fp8,0,0.017786666750907898
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,12,1,128,1,float16,float16,0,0.01710933322707812
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,12,4,128,1,float16,float16,0,0.01748266691962878
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,12,4,128,1,float16,fp8,0,0.017914666483799618
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,12,2,128,1,fp8,fp8,0,0.017658667018016178
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,12,2,128,1,float16,float16,0,0.01738133281469345
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,12,4,128,1,fp8,fp8,0,0.017909333109855652
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,12,12,128,1,float16,float16,0,0.01684800038735072
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,12,12,128,1,float16,fp8,0,0.017263999829689663
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,12,1,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,12,1,128,1,fp8,fp8,0,0.01684800038735072
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,12,12,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,12,2,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,12,1,128,1,float16,float16,0,0.016735999534527462
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,12,2,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,12,4,128,1,float16,float16,0,0.016837333639462788
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,12,2,128,1,float16,float16,0,0.016314666718244553
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,12,4,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,12,4,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,12,12,128,1,float16,fp8,0,0.016549333930015564
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,12,1,128,1,float16,float16,0,0.016224000602960587
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,12,12,128,1,float16,float16,0,0.01599466676513354
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,12,12,128,1,fp8,fp8,0,0.016447999825080235
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,12,1,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,12,2,128,1,float16,float16,0,0.01624533285697301
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,12,2,128,1,fp8,fp8,0,0.016704000532627106
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,12,4,128,1,float16,float16,0,0.01621866722901662
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,12,1,128,1,fp8,fp8,0,0.01658133293191592
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,12,4,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,12,2,128,1,float16,fp8,0,0.016607999801635742
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,12,4,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,12,1,128,1,float16,float16,0,0.15239466230074564
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,12,1,128,1,float16,fp8,0,0.15359466274579367
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,12,1,128,1,fp8,fp8,0,0.15877866744995117
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,12,2,128,1,float16,float16,0,0.15481066703796387
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,12,2,128,1,fp8,fp8,0,0.16683733463287354
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,12,4,128,1,float16,float16,0,0.16342399517695108
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,12,2,128,1,float16,fp8,0,0.1530346671740214
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,12,4,128,1,float16,fp8,0,0.15972266594568887
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,12,4,128,1,fp8,fp8,0,0.1811466614405314
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,12,1,128,1,float16,float16,0,0.08288000027338664
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,12,12,128,1,float16,float16,0,0.09362133344014485
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,12,12,128,1,float16,fp8,0,0.0909493366877238
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,12,1,128,1,float16,fp8,0,0.08387200037638347
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,12,1,128,1,fp8,fp8,0,0.08781866232554118
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,12,12,128,1,fp8,fp8,0,0.10215466221173604
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,12,2,128,1,fp8,fp8,0,0.09243200222651164
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,12,2,128,1,float16,fp8,0,0.08478933572769165
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,12,4,128,1,float16,float16,0,0.08971200386683147
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,12,4,128,1,float16,fp8,0,0.08842133482297261
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,12,2,128,1,float16,float16,0,0.08455466230710347
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,12,4,128,1,fp8,fp8,0,0.09725333253542583
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,12,1,128,1,float16,float16,0,0.046223998069763184
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,12,12,128,1,fp8,fp8,0,0.058464000622431435
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,12,1,128,1,fp8,fp8,0,0.048997332652409874
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,12,1,128,1,float16,fp8,0,0.04717866579691569
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,12,12,128,1,float16,fp8,0,0.05148266752560934
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,12,12,128,1,float16,float16,0,0.052746668457984924
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,12,2,128,1,fp8,fp8,0,0.050437331199645996
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,12,2,128,1,float16,float16,0,0.04673600196838379
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,12,4,128,1,float16,float16,0,0.04955733319123586
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,12,2,128,1,float16,fp8,0,0.04721599817276001
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,12,12,128,1,float16,float16,0,0.030415999392668407
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,12,4,128,1,fp8,fp8,0,0.055488000313440956
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,12,12,128,1,float16,fp8,0,0.02961066613594691
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,12,4,128,1,float16,fp8,0,0.04957866668701172
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,12,12,128,1,fp8,fp8,0,0.03336533407370249
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,12,1,128,1,float16,fp8,0,0.029461334149042766
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,12,1,128,1,float16,float16,0,0.029194665451844532
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,12,2,128,1,float16,float16,0,0.0295413335164388
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,12,2,128,1,float16,fp8,0,0.029861333469549816
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,12,1,128,1,fp8,fp8,0,0.03072533259789149
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,12,4,128,1,fp8,fp8,0,0.0332640012105306
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,12,2,128,1,fp8,fp8,0,0.031194667021433514
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,12,4,128,1,float16,float16,0,0.03046400099992752
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,12,4,128,1,float16,fp8,0,0.03070933371782303
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,12,12,128,1,float16,fp8,0,0.0240639994541804
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,12,12,128,1,fp8,fp8,0,0.025775998830795288
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,12,12,128,1,float16,float16,0,0.023728000621000927
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,12,1,128,1,float16,fp8,0,0.02314666658639908
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,12,1,128,1,float16,float16,0,0.023045333723227184
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,12,2,128,1,float16,fp8,0,0.023423999547958374
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,12,4,128,1,float16,float16,0,0.023717333873112995
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,12,2,128,1,fp8,fp8,0,0.02462933212518692
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,12,4,128,1,fp8,fp8,0,0.02601066728432973
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,12,1,128,1,fp8,fp8,0,0.02403733382622401
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,12,4,128,1,float16,fp8,0,0.023936000963052113
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,12,12,128,1,float16,fp8,0,0.017301333447297413
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,12,2,128,1,float16,float16,0,0.023178666830062866
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,12,12,128,1,fp8,fp8,0,0.018165333817402523
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,12,12,128,1,float16,float16,0,0.01700266698996226
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,12,1,128,1,float16,fp8,0,0.016714667280515034
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,12,1,128,1,float16,float16,0,0.01637866720557213
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,12,2,128,1,float16,fp8,0,0.016741332908471424
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,12,4,128,1,float16,float16,0,0.016704000532627106
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,12,1,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,12,2,128,1,float16,float16,0,0.016506666938463848
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,12,4,128,1,fp8,fp8,0,0.017685333887736004
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,12,2,128,1,fp8,fp8,0,0.01747200017174085
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,12,12,128,1,float16,float16,0,0.015674666812022526
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,12,12,128,1,float16,fp8,0,0.015962666521469753
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,12,4,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,12,12,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,12,1,128,1,fp8,fp8,0,0.016538667182127636
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,12,1,128,1,float16,float16,0,0.015439999600251516
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,12,2,128,1,fp8,fp8,0,0.016575999557971954
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,12,1,128,1,float16,fp8,0,0.01607999950647354
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,12,2,128,1,float16,fp8,0,0.015909332782030106
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,12,2,128,1,float16,float16,0,0.01565333331624667
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,12,4,128,1,float16,fp8,0,0.016176000237464905
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,12,4,128,1,float16,float16,0,0.015658666690190632
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,12,4,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,12,12,128,1,float16,fp8,0,0.015322666615247726
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,12,12,128,1,float16,float16,0,0.014549333602190018
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,12,1,128,1,float16,float16,0,0.015034666905800501
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,12,1,128,1,fp8,fp8,0,0.016389333953460056
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,12,12,128,1,fp8,fp8,0,0.016021333634853363
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,12,1,128,1,float16,fp8,0,0.015850666910409927
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,12,2,128,1,float16,fp8,0,0.01579733317097028
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,12,2,128,1,fp8,fp8,0,0.01617066686352094
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,12,4,128,1,float16,fp8,0,0.015669333438078564
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,12,2,128,1,float16,float16,0,0.014842666685581207
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,12,4,128,1,fp8,fp8,0,0.016330666840076447
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,12,4,128,1,float16,float16,0,0.014815999815861383
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,12,12,128,1,fp8,fp8,0,0.01589866727590561
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,12,1,128,1,float16,float16,0,0.01481066644191742
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,12,12,128,1,float16,fp8,0,0.015168000012636185
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,12,12,128,1,float16,float16,0,0.01440000037352244
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,12,2,128,1,float16,float16,0,0.014864000181357065
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,12,1,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,12,2,128,1,fp8,fp8,0,0.01609066625436147
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,12,4,128,1,float16,float16,0,0.014858666807413101
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,12,2,128,1,float16,fp8,0,0.015274666249752045
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,12,1,128,1,fp8,fp8,0,0.01575999955336253
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,12,4,128,1,float16,fp8,0,0.01553600033124288
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,12,4,128,1,fp8,fp8,0,0.016016000260909397
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,12,1,128,1,float16,float16,0,0.10689600308736165
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,12,1,128,1,float16,fp8,0,0.10689600308736165
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,12,1,128,1,fp8,fp8,0,0.12961600224177042
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,12,2,128,1,float16,float16,0,0.10828800002733867
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,12,2,128,1,float16,fp8,0,0.10834133625030518
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,12,2,128,1,fp8,fp8,0,0.133050670226415
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,12,12,128,1,float16,float16,0,0.06402666866779327
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,12,4,128,1,fp8,fp8,0,0.13911466797192892
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,12,4,128,1,float16,float16,0,0.11327999830245972
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,12,12,128,1,float16,fp8,0,0.06342400113741557
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,12,4,128,1,float16,fp8,0,0.11272533734639485
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,12,12,128,1,fp8,fp8,0,0.07966400186220805
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,12,1,128,1,float16,float16,0,0.057818666100502014
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,12,1,128,1,fp8,fp8,0,0.07049066821734111
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,12,2,128,1,float16,float16,0,0.05881600081920624
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,12,2,128,1,fp8,fp8,0,0.07259200016657512
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,12,1,128,1,float16,fp8,0,0.05907199780146281
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,12,4,128,1,float16,fp8,0,0.06196799874305725
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,12,2,128,1,float16,fp8,0,0.05972266693909963
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,12,4,128,1,float16,float16,0,0.06140799820423126
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,12,4,128,1,fp8,fp8,0,0.0759626676638921
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,12,12,128,1,float16,float16,0,0.03473600000143051
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,12,12,128,1,float16,fp8,0,0.03489600121974945
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,12,1,128,1,float16,fp8,0,0.03533333291610082
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,12,12,128,1,fp8,fp8,0,0.04381866753101349
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,12,1,128,1,float16,float16,0,0.034976000587145485
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,12,1,128,1,fp8,fp8,0,0.04109866668780645
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,12,2,128,1,fp8,fp8,0,0.04218133290608724
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,12,4,128,1,float16,float16,0,0.03581333408753077
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,12,2,128,1,float16,float16,0,0.03501333296298981
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,12,2,128,1,float16,fp8,0,0.035258665680885315
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,12,4,128,1,float16,fp8,0,0.036346666514873505
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,12,4,128,1,fp8,fp8,0,0.04342933495839437
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,12,1,128,1,float16,float16,0,0.02404800057411194
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,12,1,128,1,float16,fp8,0,0.024325333535671234
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,12,12,128,1,float16,float16,0,0.025098666548728943
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,12,12,128,1,float16,fp8,0,0.025040000677108765
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,12,1,128,1,fp8,fp8,0,0.027765333652496338
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,12,2,128,1,float16,fp8,0,0.024442667762438457
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,12,2,128,1,float16,float16,0,0.024458666642506916
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,12,12,128,1,fp8,fp8,0,0.02956266701221466
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,12,2,128,1,fp8,fp8,0,0.028399998943010967
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,12,12,128,1,float16,float16,0,0.01897066707412402
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,12,4,128,1,float16,float16,0,0.025055999557177227
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,12,12,128,1,float16,fp8,0,0.01882133384545644
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,12,1,128,1,float16,float16,0,0.018272000054518383
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,12,12,128,1,fp8,fp8,0,0.02160000056028366
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,12,4,128,1,fp8,fp8,0,0.02962133288383484
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,12,1,128,1,float16,fp8,0,0.018464000274737675
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,12,4,128,1,float16,fp8,0,0.025045332809289295
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,12,1,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,12,2,128,1,fp8,fp8,0,0.02094399929046631
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,12,4,128,1,fp8,fp8,0,0.021322667598724365
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,12,2,128,1,float16,float16,0,0.018325333793958027
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,12,4,128,1,float16,float16,0,0.018453333526849747
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,12,2,128,1,float16,fp8,0,0.018565333137909572
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,12,4,128,1,float16,fp8,0,0.018922666708628338
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,12,12,128,1,float16,fp8,0,0.015743999431530636
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,12,1,128,1,float16,float16,0,0.015061333775520325
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,12,12,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,12,1,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,12,12,128,1,float16,float16,0,0.015146666516860327
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,12,1,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,12,2,128,1,float16,float16,0,0.014778666198253632
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,12,2,128,1,fp8,fp8,0,0.01685333376129468
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,12,2,128,1,float16,fp8,0,0.015520000209410986
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,12,4,128,1,float16,float16,0,0.015024000157912573
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,12,4,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,12,4,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,12,12,128,1,float16,fp8,0,0.014725333700577417
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,12,1,128,1,float16,float16,0,0.014368000129858652
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,12,12,128,1,fp8,fp8,0,0.016261332978804905
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,12,1,128,1,fp8,fp8,0,0.016224000602960587
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,12,12,128,1,float16,float16,0,0.01431999976436297
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,12,2,128,1,float16,float16,0,0.014458666245142618
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,12,1,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,12,2,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,12,4,128,1,float16,float16,0,0.014432000617186228
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,12,2,128,1,fp8,fp8,0,0.016410666207472484
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,12,4,128,1,fp8,fp8,0,0.016597333053747814
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,12,4,128,1,float16,fp8,0,0.01488000030318896
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,12,12,128,1,float16,fp8,0,0.014335999886194864
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,12,12,128,1,float16,float16,0,0.013829333086808523
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,12,1,128,1,float16,float16,0,0.014208000153303146
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,12,12,128,1,fp8,fp8,0,0.01589866727590561
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,12,2,128,1,float16,float16,0,0.014149333039919535
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,12,1,128,1,float16,fp8,0,0.014709333578745524
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,12,1,128,1,fp8,fp8,0,0.01599466676513354
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,12,2,128,1,fp8,fp8,0,0.016095999628305435
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,12,2,128,1,float16,fp8,0,0.014570667097965876
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,12,4,128,1,float16,fp8,0,0.014607999473810196
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,12,12,128,1,float16,fp8,0,0.013855999956528345
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,12,1,128,1,float16,float16,0,0.014021333307027817
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,12,12,128,1,float16,float16,0,0.013568000247081121
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,12,12,128,1,fp8,fp8,0,0.01562133307258288
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,12,4,128,1,fp8,fp8,0,0.016095999628305435
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,12,4,128,1,float16,float16,0,0.014058666924635569
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,12,1,128,1,fp8,fp8,0,0.015893333901961643
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,12,2,128,1,float16,float16,0,0.013951999445756277
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,12,2,128,1,float16,fp8,0,0.014592000593741735
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,12,4,128,1,float16,float16,0,0.013877333452304205
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,12,2,128,1,fp8,fp8,0,0.01602666700879733
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,12,1,128,1,float16,fp8,0,0.014474666366974512
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,12,4,128,1,float16,fp8,0,0.014778666198253632
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,12,4,128,1,fp8,fp8,0,0.01578666642308235
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,12,1,128,1,float16,float16,0,0.09139733513196309
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,12,1,128,1,fp8,fp8,0,0.10994133353233337
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,12,1,128,1,float16,fp8,0,0.09225599964459737
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,12,2,128,1,float16,fp8,0,0.0921493371327718
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,12,2,128,1,float16,float16,0,0.09151466687520345
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,12,2,128,1,fp8,fp8,0,0.11181333661079407
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,12,4,128,1,float16,float16,0,0.09410666426022847
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,12,12,128,1,float16,float16,0,0.04788800080617269
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,12,4,128,1,fp8,fp8,0,0.11641066273053487
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,12,4,128,1,float16,fp8,0,0.09493866562843323
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,12,12,128,1,fp8,fp8,0,0.06384533147017162
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,12,12,128,1,float16,fp8,0,0.04764799773693085
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,12,1,128,1,float16,float16,0,0.051914667089780174
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,12,1,128,1,float16,fp8,0,0.05166399975617727
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,12,2,128,1,float16,float16,0,0.05227733155091604
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,12,1,128,1,fp8,fp8,0,0.0622026671965917
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,12,2,128,1,float16,fp8,0,0.05204799771308899
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,12,4,128,1,float16,float16,0,0.053157334526379905
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,12,4,128,1,float16,fp8,0,0.05373333394527435
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,12,4,128,1,fp8,fp8,0,0.06429333488146464
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,12,2,128,1,fp8,fp8,0,0.06361599763234456
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,12,12,128,1,float16,fp8,0,0.03141333411137263
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,12,12,128,1,float16,float16,0,0.031290667752424874
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,12,1,128,1,float16,fp8,0,0.032618666688601174
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,12,1,128,1,float16,float16,0,0.03249600032965342
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,12,12,128,1,fp8,fp8,0,0.04002666721741358
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,12,1,128,1,fp8,fp8,0,0.038634667793909706
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,12,2,128,1,float16,float16,0,0.03279466678698858
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,12,2,128,1,fp8,fp8,0,0.03896533449490865
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,12,2,128,1,float16,fp8,0,0.03319466610749563
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,12,4,128,1,float16,float16,0,0.033386667569478355
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,12,4,128,1,float16,fp8,0,0.03363733241955439
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,12,4,128,1,fp8,fp8,0,0.0400693342089653
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,12,1,128,1,float16,float16,0,0.021546666820844013
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,12,12,128,1,float16,fp8,0,0.02149333308140437
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,12,12,128,1,fp8,fp8,0,0.025045332809289295
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,12,12,128,1,float16,float16,0,0.021066665649414062
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,12,2,128,1,float16,float16,0,0.021007999777793884
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,12,1,128,1,float16,fp8,0,0.021488000949223835
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,12,2,128,1,float16,fp8,0,0.021482666333516438
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,12,1,128,1,fp8,fp8,0,0.02454400062561035
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,12,2,128,1,fp8,fp8,0,0.024890666206677754
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,12,4,128,1,float16,float16,0,0.021717332303524017
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,12,4,128,1,float16,fp8,0,0.021386665602525074
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,12,12,128,1,float16,float16,0,0.01757866640885671
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,12,12,128,1,fp8,fp8,0,0.020576000213623047
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,12,4,128,1,fp8,fp8,0,0.025263999899228413
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,12,1,128,1,float16,fp8,0,0.01782400036851565
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,12,12,128,1,float16,fp8,0,0.018090666582187016
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,12,1,128,1,float16,float16,0,0.01721599946419398
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,12,1,128,1,fp8,fp8,0,0.020560000091791153
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,12,2,128,1,float16,float16,0,0.017616000026464462
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,12,2,128,1,float16,fp8,0,0.017738666385412216
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,12,2,128,1,fp8,fp8,0,0.020687999824682873
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,12,4,128,1,float16,fp8,0,0.017808000246683758
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,12,12,128,1,float16,fp8,0,0.014933332800865173
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,12,4,128,1,float16,float16,0,0.017477333545684814
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,12,12,128,1,float16,float16,0,0.01451733335852623
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,12,1,128,1,float16,float16,0,0.014394666999578476
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,12,4,128,1,fp8,fp8,0,0.020762667059898376
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,12,1,128,1,float16,fp8,0,0.01479999969402949
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,12,12,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,12,1,128,1,fp8,fp8,0,0.01674666628241539
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,12,2,128,1,float16,float16,0,0.014453332871198654
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,12,4,128,1,float16,float16,0,0.014432000617186228
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,12,2,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,12,4,128,1,fp8,fp8,0,0.017162666966517765
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,12,2,128,1,fp8,fp8,0,0.016714667280515034
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,12,12,128,1,float16,fp8,0,0.014346666634082794
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,12,4,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,12,12,128,1,fp8,fp8,0,0.016229332735141117
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,12,12,128,1,float16,float16,0,0.014085333794355392
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,12,1,128,1,float16,float16,0,0.01404800017674764
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,12,1,128,1,fp8,fp8,0,0.0161013330022494
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,12,1,128,1,float16,fp8,0,0.01471466695268949
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,12,2,128,1,float16,float16,0,0.013888000200192133
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,12,2,128,1,float16,fp8,0,0.014778666198253632
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,12,4,128,1,float16,float16,0,0.01404800017674764
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,12,12,128,1,float16,float16,0,0.013749333719412485
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,12,4,128,1,fp8,fp8,0,0.016293333222468693
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,12,4,128,1,float16,fp8,0,0.014661333213249842
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,12,12,128,1,float16,fp8,0,0.014325333138306936
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,12,2,128,1,fp8,fp8,0,0.01621333385507266
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,12,1,128,1,float16,float16,0,0.013861333330472311
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,12,1,128,1,float16,fp8,0,0.014655999839305878
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,12,12,128,1,fp8,fp8,0,0.015728000551462173
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,12,1,128,1,fp8,fp8,0,0.015850666910409927
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,12,2,128,1,float16,float16,0,0.013776000589132309
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,12,2,128,1,fp8,fp8,0,0.01607999950647354
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,12,2,128,1,float16,fp8,0,0.01444799949725469
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,12,4,128,1,float16,float16,0,0.013781332721312841
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,12,4,128,1,float16,fp8,0,0.014666666587193808
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,12,4,128,1,fp8,fp8,0,0.016058667252461117
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,12,12,128,1,float16,fp8,0,0.013733333597580591
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,12,12,128,1,fp8,fp8,0,0.015626666446526844
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,12,12,128,1,float16,float16,0,0.01321600005030632
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,12,1,128,1,float16,float16,0,0.013717333475748697
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,12,1,128,1,float16,fp8,0,0.014287999520699183
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,12,2,128,1,fp8,fp8,0,0.015856000284353893
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,12,2,128,1,float16,float16,0,0.013765333841244379
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,12,2,128,1,float16,fp8,0,0.014549333602190018
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,12,1,128,1,fp8,fp8,0,0.015685333559910457
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,12,4,128,1,float16,float16,0,0.013909333695967993
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,12,4,128,1,float16,fp8,0,0.014469332993030548
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,12,4,128,1,fp8,fp8,0,0.01595199977358182
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,8,1,128,1,fp8,fp8,0,5.289344151814778
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,8,1,128,1,float16,fp8,0,8.094314575195312
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,8,1,128,1,float16,float16,0,8.209909439086914
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,8,2,128,1,fp8,fp8,0,5.211061477661133
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,8,2,128,1,float16,float16,0,8.43661880493164
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,8,2,128,1,float16,fp8,0,8.238517125447592
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,8,4,128,1,float16,fp8,0,8.137626647949219
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,8,4,128,1,float16,float16,0,8.362229029337565
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,8,1,128,1,float16,float16,0,4.110255877176921
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,8,1,128,1,float16,fp8,0,4.08021863301595
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,8,1,128,1,fp8,fp8,0,2.5990187327067056
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,8,8,128,1,fp8,fp8,0,2.5804640452067056
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,8,8,128,1,float16,float16,0,4.009759902954102
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,8,8,128,1,float16,fp8,0,3.7741066614786782
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,8,2,128,1,float16,float16,0,4.130778630574544
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,8,4,128,1,fp8,fp8,0,5.350053151448567
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,8,2,128,1,float16,fp8,0,3.9321654637654624
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,8,2,128,1,fp8,fp8,0,2.635082721710205
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,8,1,128,1,float16,float16,0,2.061072031656901
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,8,8,128,1,float16,fp8,0,1.9887946446736653
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,8,4,128,1,float16,float16,0,4.080944061279297
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,8,8,128,1,float16,float16,0,1.9803306261698406
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,8,4,128,1,float16,fp8,0,4.000570615132649
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,8,4,128,1,fp8,fp8,0,2.6706291834513345
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,8,8,128,1,fp8,fp8,0,1.3509546915690105
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,8,1,128,1,float16,fp8,0,2.05129607518514
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,8,1,128,1,fp8,fp8,0,1.3805386225382488
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,8,2,128,1,float16,float16,0,2.0493226051330566
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,8,2,128,1,float16,fp8,0,2.0347679456075034
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,8,8,128,1,float16,float16,0,1.0983786582946777
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,8,4,128,1,fp8,fp8,0,1.3911733627319336
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,8,4,128,1,float16,float16,0,2.059882640838623
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,8,2,128,1,fp8,fp8,0,1.3789280255635579
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,8,8,128,1,float16,fp8,0,1.095589319864909
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,8,4,128,1,float16,fp8,0,2.0589067141215005
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,8,1,128,1,float16,float16,0,1.1189706325531006
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,8,8,128,1,fp8,fp8,0,0.6787467002868652
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,8,1,128,1,fp8,fp8,0,0.6986453533172607
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,8,2,128,1,float16,float16,0,1.1258666515350342
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,8,1,128,1,float16,fp8,0,1.1202293237050374
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,8,2,128,1,float16,fp8,0,1.110746701558431
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,8,2,128,1,fp8,fp8,0,0.7032159964243571
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,8,4,128,1,float16,float16,0,1.1313119729359944
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,8,4,128,1,float16,fp8,0,1.1162933508555095
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,8,4,128,1,fp8,fp8,0,0.7040159702301025
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,8,1,128,1,float16,float16,0,4.465397198994954
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,8,1,128,1,float16,fp8,0,4.604890823364258
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,8,1,128,1,fp8,fp8,0,3.021637280782064
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,8,2,128,1,float16,float16,0,4.654778798421224
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,8,2,128,1,float16,fp8,0,4.676389376322429
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,8,2,128,1,fp8,fp8,0,3.0274025599161782
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,8,4,128,1,float16,float16,0,4.782997449239095
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,8,4,128,1,float16,fp8,0,4.559397379557292
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,8,1,128,1,float16,float16,0,2.3202239672342935
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,8,1,128,1,float16,fp8,0,2.299584070841471
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,8,8,128,1,float16,float16,0,2.2653493881225586
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,8,8,128,1,float16,fp8,0,2.283738613128662
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,8,1,128,1,fp8,fp8,0,1.5565333366394043
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,8,4,128,1,fp8,fp8,0,3.04201602935791
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,8,8,128,1,fp8,fp8,0,1.5446559588114421
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,8,2,128,1,float16,float16,0,2.3155946731567383
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,8,2,128,1,float16,fp8,0,2.3102240562438965
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,8,2,128,1,fp8,fp8,0,1.5710080464680989
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,8,4,128,1,float16,float16,0,2.323194662729899
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,8,1,128,1,float16,float16,0,1.233189344406128
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,8,8,128,1,float16,float16,0,1.198800007502238
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,8,8,128,1,float16,fp8,0,1.1850346724192302
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,8,1,128,1,float16,fp8,0,1.2212533156077068
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,8,8,128,1,fp8,fp8,0,0.7886613210042318
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,8,4,128,1,fp8,fp8,0,1.5823200543721516
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,8,4,128,1,float16,fp8,0,2.3323307037353516
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,8,1,128,1,fp8,fp8,0,0.7908853689829508
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,8,2,128,1,float16,float16,0,1.237013339996338
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,8,2,128,1,float16,fp8,0,1.21670397122701
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,8,2,128,1,fp8,fp8,0,0.7921333312988281
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,8,4,128,1,float16,float16,0,1.2410293420155842
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,8,4,128,1,float16,fp8,0,1.227562665939331
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,8,4,128,1,fp8,fp8,0,0.8073546886444092
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,8,8,128,1,float16,float16,0,0.5908799966176351
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,8,1,128,1,float16,float16,0,0.5897440115610758
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,8,1,128,1,float16,fp8,0,0.5836639801661173
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,8,8,128,1,float16,fp8,0,0.5818613370259603
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,8,1,128,1,fp8,fp8,0,0.42787734667460126
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,8,8,128,1,fp8,fp8,0,0.4182826677958171
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,8,2,128,1,float16,float16,0,0.5903573433558146
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,8,2,128,1,float16,fp8,0,0.586021343866984
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,8,2,128,1,fp8,fp8,0,0.42980798085530597
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,8,4,128,1,float16,float16,0,0.5942986806233724
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,8,4,128,1,float16,fp8,0,0.5886346499125162
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,8,4,128,1,fp8,fp8,0,0.4339040120442708
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,8,1,128,1,fp8,fp8,0,2.1781973838806152
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,8,1,128,1,float16,fp8,0,3.2002719243367515
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,8,1,128,1,float16,float16,0,3.260357220967611
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,8,2,128,1,float16,float16,0,3.2433172861735025
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,8,2,128,1,fp8,fp8,0,2.180191993713379
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,8,2,128,1,float16,fp8,0,3.217146555582682
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,8,4,128,1,float16,float16,0,3.196762720743815
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,8,4,128,1,float16,fp8,0,3.210048039754232
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,8,1,128,1,float16,float16,0,1.650261402130127
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,8,8,128,1,float16,fp8,0,1.6394400596618652
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,8,8,128,1,float16,float16,0,1.6460533142089844
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,8,1,128,1,float16,fp8,0,1.6557973225911458
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,8,4,128,1,fp8,fp8,0,2.224463939666748
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,8,2,128,1,float16,float16,0,1.664021333058675
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,8,1,128,1,fp8,fp8,0,1.1336533228556316
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,8,8,128,1,fp8,fp8,0,1.1392266750335693
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,8,2,128,1,float16,fp8,0,1.642357349395752
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,8,2,128,1,fp8,fp8,0,1.1445386409759521
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,8,8,128,1,float16,float16,0,0.8770986398061117
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,8,8,128,1,fp8,fp8,0,0.5756586790084839
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,8,4,128,1,float16,float16,0,1.683626651763916
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,8,8,128,1,float16,fp8,0,0.8650240103403727
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,8,1,128,1,float16,float16,0,0.8927413622538248
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,8,4,128,1,fp8,fp8,0,1.1553066571553547
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,8,4,128,1,float16,fp8,0,1.675546646118164
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,8,1,128,1,float16,fp8,0,0.8886773586273193
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,8,1,128,1,fp8,fp8,0,0.5798399845759074
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,8,2,128,1,fp8,fp8,0,0.5795626640319824
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,8,2,128,1,float16,fp8,0,0.8935253620147705
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,8,2,128,1,float16,float16,0,0.8959786891937256
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,8,4,128,1,float16,float16,0,0.8981119791666666
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,8,4,128,1,float16,fp8,0,0.8928266366322836
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,8,8,128,1,float16,float16,0,0.4333759943644206
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,8,4,128,1,fp8,fp8,0,0.589136004447937
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,8,1,128,1,float16,float16,0,0.4411253531773885
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,8,1,128,1,float16,fp8,0,0.4344533284505208
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,8,1,128,1,fp8,fp8,0,0.31919999917348224
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,8,8,128,1,float16,fp8,0,0.42815999190012616
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,8,8,128,1,fp8,fp8,0,0.3152160048484802
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,8,2,128,1,float16,float16,0,0.43753600120544434
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,8,2,128,1,float16,fp8,0,0.43514664967854816
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,8,2,128,1,fp8,fp8,0,0.320906658967336
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,8,4,128,1,float16,float16,0,0.4439520041147868
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,8,4,128,1,float16,fp8,0,0.4381706714630127
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,8,4,128,1,fp8,fp8,0,0.32335466146469116
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,8,1,128,1,float16,float16,0,4.186911900838216
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,8,1,128,1,fp8,fp8,0,2.8667093912760415
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,8,1,128,1,float16,fp8,0,4.2366987864176435
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,8,2,128,1,float16,float16,0,4.225221316019694
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,8,2,128,1,float16,fp8,0,4.406480153401692
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,8,2,128,1,fp8,fp8,0,2.8972107569376626
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,8,4,128,1,float16,float16,0,4.315098762512207
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,8,4,128,1,float16,fp8,0,4.420223871866862
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,8,1,128,1,float16,float16,0,2.157893339792887
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,8,1,128,1,float16,fp8,0,2.1317920684814453
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,8,8,128,1,float16,fp8,0,2.0826452573140464
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,8,8,128,1,float16,float16,0,2.113647937774658
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,8,8,128,1,fp8,fp8,0,1.501461346944173
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,8,4,128,1,fp8,fp8,0,2.94538148244222
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,8,1,128,1,fp8,fp8,0,1.4737280209859211
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,8,2,128,1,float16,float16,0,2.1367467244466147
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,8,2,128,1,fp8,fp8,0,1.4938507080078125
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,8,2,128,1,float16,fp8,0,2.145125389099121
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,8,4,128,1,float16,float16,0,2.140127976735433
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,8,4,128,1,float16,fp8,0,2.141551971435547
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,8,4,128,1,fp8,fp8,0,1.5096853574117024
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,8,8,128,1,float16,float16,0,1.105082670847575
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,8,8,128,1,float16,fp8,0,1.0973920027414958
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,8,8,128,1,fp8,fp8,0,0.786730686823527
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,8,1,128,1,float16,float16,0,1.1222026348114014
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,8,1,128,1,float16,fp8,0,1.1132373015085857
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,8,1,128,1,fp8,fp8,0,0.7799946467081705
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,8,2,128,1,float16,float16,0,1.1187146504720051
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,8,2,128,1,float16,fp8,0,1.11516269048055
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,8,2,128,1,fp8,fp8,0,0.788319985071818
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,8,4,128,1,float16,float16,0,1.1243573029836018
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,8,4,128,1,fp8,fp8,0,0.7996906439463297
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,8,4,128,1,float16,fp8,0,1.115834633509318
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,8,8,128,1,float16,float16,0,0.6066240072250366
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,8,1,128,1,float16,float16,0,0.6144746541976929
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,8,1,128,1,float16,fp8,0,0.6092160145441691
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,8,8,128,1,float16,fp8,0,0.5955520073572794
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,8,8,128,1,fp8,fp8,0,0.4014666477839152
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,8,1,128,1,fp8,fp8,0,0.3978506724039714
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,8,2,128,1,float16,fp8,0,0.6093386809031168
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,8,2,128,1,float16,float16,0,0.6179893414179484
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,8,2,128,1,fp8,fp8,0,0.4023679892222087
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,8,1,128,1,float16,float16,0,0.2949440081914266
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,8,8,128,1,float16,float16,0,0.2908639907836914
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,8,4,128,1,float16,float16,0,0.6193866729736328
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,8,4,128,1,fp8,fp8,0,0.4060426553090413
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,8,8,128,1,float16,fp8,0,0.28829334179560345
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,8,4,128,1,float16,fp8,0,0.6170026858647665
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,8,8,128,1,fp8,fp8,0,0.22126400470733643
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,8,1,128,1,float16,fp8,0,0.29123733441034955
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,8,1,128,1,fp8,fp8,0,0.21896000703175864
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,8,2,128,1,float16,fp8,0,0.29180266459782916
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,8,2,128,1,fp8,fp8,0,0.22190399964650473
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,8,4,128,1,float16,float16,0,0.2952959934870402
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,8,4,128,1,float16,fp8,0,0.29499733448028564
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,8,4,128,1,fp8,fp8,0,0.22558399041493735
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,8,2,128,1,float16,float16,0,0.2934666673342387
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,8,1,128,1,float16,float16,0,2.5257867177327475
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,8,1,128,1,float16,fp8,0,2.4901599884033203
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,8,1,128,1,fp8,fp8,0,1.7610613505045574
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,8,2,128,1,float16,float16,0,2.4931413332621255
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,8,2,128,1,float16,fp8,0,2.482202688852946
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,8,2,128,1,fp8,fp8,0,1.772640069325765
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,8,4,128,1,float16,float16,0,2.5238614082336426
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,8,4,128,1,float16,fp8,0,2.508128007253011
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,8,1,128,1,float16,float16,0,1.284277359644572
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,8,1,128,1,float16,fp8,0,1.273861328760783
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,8,1,128,1,fp8,fp8,0,0.9120906988779703
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,8,8,128,1,float16,float16,0,1.2937920093536377
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,8,8,128,1,float16,fp8,0,1.283893346786499
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,8,8,128,1,fp8,fp8,0,0.9286346435546875
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,8,2,128,1,float16,fp8,0,1.2742773691813152
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,8,4,128,1,fp8,fp8,0,1.8110987345377605
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,8,2,128,1,fp8,fp8,0,0.9203519821166992
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,8,2,128,1,float16,float16,0,1.2905759811401367
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,8,8,128,1,float16,float16,0,0.6741387049357096
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,8,4,128,1,float16,float16,0,1.298192024230957
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,8,4,128,1,float16,fp8,0,1.2991092999776204
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,8,4,128,1,fp8,fp8,0,0.9384533564249674
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,8,1,128,1,float16,float16,0,0.6856160163879395
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,8,1,128,1,float16,fp8,0,0.6749333540598551
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,8,8,128,1,float16,fp8,0,0.6755413214365641
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,8,8,128,1,fp8,fp8,0,0.4885546763737996
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,8,1,128,1,fp8,fp8,0,0.464197317759196
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,8,2,128,1,float16,float16,0,0.6866827011108398
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,8,2,128,1,float16,fp8,0,0.6832640171051025
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,8,2,128,1,fp8,fp8,0,0.47043200333913165
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,8,4,128,1,float16,float16,0,0.6932586828867594
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,8,1,128,1,float16,float16,0,0.33515199025472003
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,8,4,128,1,fp8,fp8,0,0.48234665393829346
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,8,1,128,1,float16,fp8,0,0.3297813336054484
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,8,8,128,1,float16,fp8,0,0.3317013382911682
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,8,4,128,1,float16,fp8,0,0.6849493185679117
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,8,8,128,1,fp8,fp8,0,0.2572266658147176
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,8,1,128,1,fp8,fp8,0,0.253493328889211
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,8,8,128,1,float16,float16,0,0.3385973374048869
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,8,2,128,1,float16,float16,0,0.3365333477656047
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,8,2,128,1,fp8,fp8,0,0.2544479966163635
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,8,2,128,1,float16,fp8,0,0.3309653401374817
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,8,8,128,1,float16,float16,0,0.19407467047373453
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,8,4,128,1,float16,fp8,0,0.33480532964070636
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,8,4,128,1,fp8,fp8,0,0.25880000988642377
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,8,4,128,1,float16,float16,0,0.33793067932128906
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,8,8,128,1,float16,fp8,0,0.19261332352956137
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,8,1,128,1,float16,float16,0,0.19437867403030396
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,8,1,128,1,float16,fp8,0,0.19195733467737833
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,8,8,128,1,fp8,fp8,0,0.1511679987112681
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,8,2,128,1,float16,float16,0,0.194815993309021
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,8,2,128,1,fp8,fp8,0,0.14620266358057657
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,8,1,128,1,fp8,fp8,0,0.14562132954597473
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,8,2,128,1,float16,fp8,0,0.19298666715621948
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,8,4,128,1,float16,float16,0,0.1975253423055013
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,8,4,128,1,float16,fp8,0,0.19529066483179727
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,8,4,128,1,fp8,fp8,0,0.1509813368320465
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,8,1,128,1,float16,float16,0,2.4704480171203613
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,8,1,128,1,fp8,fp8,0,1.7795626322428386
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,8,1,128,1,float16,fp8,0,2.4166080156962075
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,8,2,128,1,float16,float16,0,2.468949317932129
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,8,2,128,1,float16,fp8,0,2.4146453539530435
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,8,2,128,1,fp8,fp8,0,1.80839999516805
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,8,4,128,1,float16,float16,0,2.5239893595377603
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,8,4,128,1,float16,fp8,0,2.448618729909261
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,8,1,128,1,float16,float16,0,1.2471946875254314
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,8,1,128,1,float16,fp8,0,1.2355999946594238
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,8,8,128,1,float16,float16,0,1.2481173674265544
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,8,1,128,1,fp8,fp8,0,0.9160799980163574
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,8,8,128,1,float16,fp8,0,1.247226635615031
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,8,2,128,1,float16,float16,0,1.2524747053782146
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,8,8,128,1,fp8,fp8,0,0.9589707056681315
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,8,4,128,1,fp8,fp8,0,1.8581120173136394
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,8,2,128,1,float16,fp8,0,1.2321653366088867
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,8,2,128,1,fp8,fp8,0,0.9293013413747152
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,8,1,128,1,float16,float16,0,0.6509600083033243
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,8,8,128,1,float16,float16,0,0.6525439818700155
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,8,8,128,1,float16,fp8,0,0.651093324025472
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,8,4,128,1,float16,float16,0,1.2614826361338298
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,8,4,128,1,float16,fp8,0,1.2579253514607747
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,8,4,128,1,fp8,fp8,0,0.946890672047933
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,8,8,128,1,fp8,fp8,0,0.5051146745681763
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,8,1,128,1,float16,fp8,0,0.6430773337682089
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,8,1,128,1,fp8,fp8,0,0.4814399878184001
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,8,2,128,1,float16,float16,0,0.6515466769536337
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,8,2,128,1,fp8,fp8,0,0.4866186777750651
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,8,2,128,1,float16,fp8,0,0.6489760080973307
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,8,4,128,1,float16,float16,0,0.6633173227310181
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,8,4,128,1,float16,fp8,0,0.6516053279240926
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,8,4,128,1,fp8,fp8,0,0.5007040103276571
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,8,8,128,1,float16,fp8,0,0.35600535074869794
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,8,1,128,1,float16,float16,0,0.35585065682729083
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,8,1,128,1,float16,fp8,0,0.35328535238901776
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,8,8,128,1,float16,float16,0,0.36046401659647626
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,8,8,128,1,fp8,fp8,0,0.261952002843221
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,8,1,128,1,fp8,fp8,0,0.24842133124669394
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,8,2,128,1,float16,float16,0,0.3586346705754598
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,8,2,128,1,float16,fp8,0,0.3567253351211548
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,8,2,128,1,fp8,fp8,0,0.25089067220687866
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,8,4,128,1,float16,float16,0,0.36187732219696045
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,8,4,128,1,float16,fp8,0,0.3605920076370239
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,8,8,128,1,float16,float16,0,0.17727466424306235
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,8,8,128,1,float16,fp8,0,0.17670400937398276
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,8,1,128,1,float16,float16,0,0.17384533087412515
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,8,4,128,1,fp8,fp8,0,0.25780800978342694
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,8,8,128,1,fp8,fp8,0,0.14497599999109903
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,8,1,128,1,float16,fp8,0,0.17097065846125284
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,8,2,128,1,float16,float16,0,0.17566933234532675
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,8,1,128,1,fp8,fp8,0,0.136543999115626
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,8,2,128,1,float16,fp8,0,0.1722453236579895
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,8,2,128,1,fp8,fp8,0,0.13920533657073975
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,8,4,128,1,float16,float16,0,0.17815999190012613
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,8,4,128,1,float16,fp8,0,0.17620267470677695
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,8,8,128,1,float16,float16,0,0.09964266419410706
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,8,4,128,1,fp8,fp8,0,0.14300266901652017
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,8,8,128,1,float16,fp8,0,0.09875733653704326
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,8,8,128,1,fp8,fp8,0,0.08562666177749634
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,8,1,128,1,float16,fp8,0,0.09804800152778625
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,8,1,128,1,float16,float16,0,0.09922666351000468
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,8,1,128,1,fp8,fp8,0,0.08028266827265422
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,8,2,128,1,float16,fp8,0,0.09820800026257832
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,8,2,128,1,fp8,fp8,0,0.08065600196520488
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,8,4,128,1,float16,float16,0,0.09973333279291789
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,8,2,128,1,float16,float16,0,0.09964799880981445
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,8,4,128,1,fp8,fp8,0,0.08202133576075236
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,8,4,128,1,float16,fp8,0,0.09869333108266194
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,8,1,128,1,float16,float16,0,1.5258293151855469
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,8,1,128,1,float16,fp8,0,1.500197410583496
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,8,1,128,1,fp8,fp8,0,1.1474560101826985
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,8,2,128,1,float16,float16,0,1.5364640553792317
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,8,2,128,1,float16,fp8,0,1.5063893000284831
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,8,2,128,1,fp8,fp8,0,1.1667040189107258
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,8,4,128,1,float16,fp8,0,1.518880049387614
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,8,4,128,1,float16,float16,0,1.5669439633687336
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,8,1,128,1,float16,float16,0,0.7824959754943848
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,8,8,128,1,float16,float16,0,0.8011626402537028
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,8,8,128,1,float16,fp8,0,0.7845226923624674
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,8,1,128,1,float16,fp8,0,0.7674506505330404
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,8,1,128,1,fp8,fp8,0,0.590826670328776
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,8,4,128,1,fp8,fp8,0,1.1901333332061768
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,8,8,128,1,fp8,fp8,0,0.6270080010096232
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,8,2,128,1,float16,float16,0,0.7863946755727133
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,8,2,128,1,float16,fp8,0,0.7732746601104736
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,8,2,128,1,fp8,fp8,0,0.6027466853459676
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,8,8,128,1,float16,float16,0,0.4190133412679036
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,8,1,128,1,float16,float16,0,0.4158133268356323
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,8,8,128,1,fp8,fp8,0,0.3325653274854024
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,8,8,128,1,float16,fp8,0,0.41600000858306885
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,8,4,128,1,float16,float16,0,0.7971626917521158
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,8,4,128,1,fp8,fp8,0,0.6155573527018229
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,8,1,128,1,float16,fp8,0,0.408896009127299
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,8,4,128,1,float16,fp8,0,0.787722667058309
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,8,1,128,1,fp8,fp8,0,0.3020053307215373
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,8,2,128,1,float16,float16,0,0.41793068250020343
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,8,2,128,1,float16,fp8,0,0.4110506772994995
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,8,4,128,1,float16,float16,0,0.4222773313522339
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,8,2,128,1,fp8,fp8,0,0.30664000908533734
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,8,4,128,1,float16,fp8,0,0.4152959982554118
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,8,8,128,1,float16,float16,0,0.21500800053278604
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,8,4,128,1,fp8,fp8,0,0.31938666105270386
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,8,1,128,1,float16,float16,0,0.20664532979329428
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,8,8,128,1,float16,fp8,0,0.21156267325083414
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,8,1,128,1,float16,fp8,0,0.2028426726659139
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,8,8,128,1,fp8,fp8,0,0.17596266667048135
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,8,2,128,1,float16,float16,0,0.2067199945449829
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,8,2,128,1,fp8,fp8,0,0.16758400201797485
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,8,2,128,1,float16,fp8,0,0.20520534118016562
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,8,1,128,1,fp8,fp8,0,0.1641759971777598
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,8,4,128,1,float16,float16,0,0.21246933937072754
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,8,8,128,1,float16,float16,0,0.12037332852681477
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,8,4,128,1,float16,fp8,0,0.20834134022394815
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,8,4,128,1,fp8,fp8,0,0.17106133699417114
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,8,8,128,1,float16,fp8,0,0.11806399623552959
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,8,8,128,1,fp8,fp8,0,0.10221333305040996
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,8,1,128,1,float16,float16,0,0.11664533615112305
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,8,1,128,1,float16,fp8,0,0.11451733112335205
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,8,1,128,1,fp8,fp8,0,0.0909546713034312
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,8,2,128,1,float16,float16,0,0.11800533533096313
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,8,2,128,1,float16,fp8,0,0.11518399914105733
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,8,2,128,1,fp8,fp8,0,0.0937653382619222
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,8,4,128,1,float16,float16,0,0.11889066298802693
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,8,4,128,1,float16,fp8,0,0.11850666999816895
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,8,4,128,1,fp8,fp8,0,0.09935466448465984
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,8,8,128,1,float16,fp8,0,0.07327466706434886
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,8,1,128,1,float16,float16,0,0.07474133372306824
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,8,1,128,1,float16,fp8,0,0.07383466760317485
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,8,8,128,1,fp8,fp8,0,0.06281599899133046
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,8,1,128,1,fp8,fp8,0,0.06062399844328562
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,8,8,128,1,float16,float16,0,0.07317866881688435
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,8,2,128,1,float16,float16,0,0.07521600027879079
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,8,2,128,1,float16,fp8,0,0.07421866556008656
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,8,2,128,1,fp8,fp8,0,0.06087466577688853
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,8,4,128,1,float16,float16,0,0.07489066819349925
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,8,4,128,1,float16,fp8,0,0.07460799813270569
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,8,4,128,1,fp8,fp8,0,0.06202666461467743
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,8,1,128,1,float16,float16,0,1.62281068166097
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,8,1,128,1,float16,fp8,0,1.5657280286153157
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,8,1,128,1,fp8,fp8,0,1.2827839851379395
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,8,2,128,1,float16,fp8,0,1.5843520164489746
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,8,2,128,1,float16,float16,0,1.6488159497578938
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,8,2,128,1,fp8,fp8,0,1.2902560234069824
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,8,4,128,1,float16,float16,0,1.6688480377197266
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,8,4,128,1,float16,fp8,0,1.6067627271016438
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,8,1,128,1,float16,fp8,0,0.7952799797058105
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,8,1,128,1,float16,float16,0,0.8232479890187582
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,8,8,128,1,float16,fp8,0,0.8328746954600016
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,8,1,128,1,fp8,fp8,0,0.6485280195871989
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,8,2,128,1,float16,float16,0,0.82587202390035
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,8,8,128,1,float16,float16,0,0.8496267000834147
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,8,8,128,1,fp8,fp8,0,0.7036746342976888
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,8,4,128,1,fp8,fp8,0,1.3206666310628254
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,8,2,128,1,float16,fp8,0,0.8037652969360352
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,8,2,128,1,fp8,fp8,0,0.6600799957911173
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,8,8,128,1,float16,float16,0,0.4364480177561442
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,8,4,128,1,float16,float16,0,0.8468000094095866
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,8,8,128,1,float16,fp8,0,0.4320426781972249
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,8,4,128,1,float16,fp8,0,0.818511962890625
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,8,1,128,1,float16,float16,0,0.4208853244781494
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,8,4,128,1,fp8,fp8,0,0.6754186948140463
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,8,8,128,1,fp8,fp8,0,0.3666079839070638
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,8,1,128,1,float16,fp8,0,0.4137706756591797
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,8,1,128,1,fp8,fp8,0,0.3365226586659749
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,8,2,128,1,float16,float16,0,0.42789868513743085
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,8,2,128,1,float16,fp8,0,0.41764267285664874
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,8,2,128,1,fp8,fp8,0,0.34220266342163086
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,8,4,128,1,float16,float16,0,0.43137065569559735
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,8,4,128,1,fp8,fp8,0,0.35261865456899005
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,8,4,128,1,float16,fp8,0,0.427567998568217
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,8,8,128,1,float16,float16,0,0.2390186587969462
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,8,1,128,1,float16,float16,0,0.2291839917500814
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,8,1,128,1,float16,fp8,0,0.22453866402308145
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,8,8,128,1,float16,fp8,0,0.23386667172114053
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,8,1,128,1,fp8,fp8,0,0.174127995967865
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,8,8,128,1,fp8,fp8,0,0.19263466199239096
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,8,2,128,1,float16,float16,0,0.23072532812754312
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,8,2,128,1,float16,fp8,0,0.2283680041631063
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,8,2,128,1,fp8,fp8,0,0.17866667111714682
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,8,4,128,1,float16,fp8,0,0.23186665773391724
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,8,4,128,1,float16,float16,0,0.23650133609771729
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,8,8,128,1,float16,float16,0,0.12031466762224834
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,8,1,128,1,float16,float16,0,0.11342933773994446
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,8,8,128,1,float16,fp8,0,0.12050666411717732
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,8,4,128,1,fp8,fp8,0,0.18268267313639322
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,8,8,128,1,fp8,fp8,0,0.10653866330782573
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,8,1,128,1,fp8,fp8,0,0.09550399581591289
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,8,2,128,1,float16,fp8,0,0.11344533165295918
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,8,2,128,1,float16,float16,0,0.11596266428629558
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,8,2,128,1,fp8,fp8,0,0.09867200255393982
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,8,1,128,1,float16,fp8,0,0.11289067069689433
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,8,4,128,1,float16,float16,0,0.11860799789428711
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,8,4,128,1,fp8,fp8,0,0.10246933499972026
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,8,4,128,1,float16,fp8,0,0.11635733644167583
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,8,8,128,1,float16,float16,0,0.06738666693369548
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,8,1,128,1,float16,float16,0,0.06539733211199443
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,8,8,128,1,float16,fp8,0,0.06729066868623097
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,8,1,128,1,fp8,fp8,0,0.05547733108202616
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,8,1,128,1,float16,fp8,0,0.06413866579532623
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,8,2,128,1,float16,float16,0,0.06532800197601318
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,8,8,128,1,fp8,fp8,0,0.06239999830722809
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,8,2,128,1,fp8,fp8,0,0.05602133274078369
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,8,2,128,1,float16,fp8,0,0.06439466774463654
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,8,4,128,1,float16,float16,0,0.06624533236026764
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,8,4,128,1,float16,fp8,0,0.06530133386452992
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,8,4,128,1,fp8,fp8,0,0.057989334066708885
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,8,8,128,1,float16,fp8,0,0.0531626691420873
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,8,8,128,1,fp8,fp8,0,0.04540266593297323
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,8,1,128,1,float16,float16,0,0.054197331269582115
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,8,8,128,1,float16,float16,0,0.05348266661167145
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,8,1,128,1,fp8,fp8,0,0.04442666471004486
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,8,1,128,1,float16,fp8,0,0.053504000107447304
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,8,2,128,1,float16,float16,0,0.05436266462008158
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,8,2,128,1,float16,fp8,0,0.054144000013669334
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,8,2,128,1,fp8,fp8,0,0.045237332582473755
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,8,4,128,1,float16,fp8,0,0.054197331269582115
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,8,4,128,1,float16,float16,0,0.05473066866397858
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,8,4,128,1,fp8,fp8,0,0.04572799801826477
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,8,1,128,1,float16,float16,0,1.0538453261057537
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,8,1,128,1,fp8,fp8,0,0.8573493162790934
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,8,1,128,1,float16,fp8,0,1.003109296162923
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,8,2,128,1,float16,float16,0,1.0682506561279297
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,8,2,128,1,fp8,fp8,0,0.8705920378367106
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,8,2,128,1,float16,fp8,0,1.0149013201395671
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,8,4,128,1,float16,float16,0,1.080954631169637
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,8,4,128,1,float16,fp8,0,1.0397653579711914
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,8,1,128,1,float16,float16,0,0.5353493293126425
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,8,1,128,1,float16,fp8,0,0.5167839924494425
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,8,8,128,1,float16,float16,0,0.559551994005839
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,8,8,128,1,float16,fp8,0,0.5485226710637411
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,8,1,128,1,fp8,fp8,0,0.4407999912897746
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,8,4,128,1,fp8,fp8,0,0.9045759836832682
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,8,8,128,1,fp8,fp8,0,0.49397865931193036
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,8,2,128,1,float16,float16,0,0.5424960056940714
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,8,2,128,1,float16,fp8,0,0.5235786835352579
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,8,2,128,1,fp8,fp8,0,0.4481920003890991
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,8,4,128,1,float16,float16,0,0.5510346492131551
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,8,4,128,1,float16,fp8,0,0.5329813162485758
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,8,8,128,1,float16,float16,0,0.29557865858078003
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,8,8,128,1,float16,fp8,0,0.29253333806991577
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,8,4,128,1,fp8,fp8,0,0.46561598777770996
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,8,1,128,1,float16,float16,0,0.2813919981320699
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,8,1,128,1,float16,fp8,0,0.27402667204538983
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,8,8,128,1,fp8,fp8,0,0.2610293428103129
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,8,1,128,1,fp8,fp8,0,0.22339200973510742
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,8,2,128,1,float16,float16,0,0.2840213378270467
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,8,2,128,1,float16,fp8,0,0.27796266476313275
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,8,2,128,1,fp8,fp8,0,0.22984000047047934
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,8,4,128,1,float16,float16,0,0.2903146743774414
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,8,4,128,1,fp8,fp8,0,0.2396213412284851
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,8,4,128,1,float16,fp8,0,0.2831733425458272
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,8,1,128,1,float16,float16,0,0.14338133732477823
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,8,1,128,1,float16,fp8,0,0.14190399646759033
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,8,8,128,1,float16,fp8,0,0.153957337141037
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,8,8,128,1,float16,float16,0,0.1552853286266327
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,8,8,128,1,fp8,fp8,0,0.13553067048390707
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,8,1,128,1,fp8,fp8,0,0.12267733613650005
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,8,2,128,1,float16,float16,0,0.14501866698265076
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,8,2,128,1,float16,fp8,0,0.14266133308410645
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,8,2,128,1,fp8,fp8,0,0.12470400333404541
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,8,4,128,1,float16,float16,0,0.15013333161671957
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,8,4,128,1,float16,fp8,0,0.14670933286348978
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,8,8,128,1,float16,float16,0,0.0848640004793803
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,8,8,128,1,float16,fp8,0,0.08497066299120586
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,8,1,128,1,float16,float16,0,0.07779199878374736
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,8,8,128,1,fp8,fp8,0,0.07850666840871175
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,8,1,128,1,float16,fp8,0,0.07713599999745686
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,8,4,128,1,fp8,fp8,0,0.12997333208719888
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,8,1,128,1,fp8,fp8,0,0.06673066814740498
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,8,2,128,1,float16,float16,0,0.07923733194669087
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,8,2,128,1,float16,fp8,0,0.07818666597207387
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,8,2,128,1,fp8,fp8,0,0.06943466762701671
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,8,4,128,1,float16,float16,0,0.08140266438325246
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,8,4,128,1,float16,fp8,0,0.08081600069999695
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,8,4,128,1,fp8,fp8,0,0.07394666473070781
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,8,8,128,1,float16,fp8,0,0.04885333279768626
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,8,1,128,1,float16,float16,0,0.04889066517353058
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,8,8,128,1,fp8,fp8,0,0.04545066754023234
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,8,8,128,1,float16,float16,0,0.04914666712284088
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,8,1,128,1,fp8,fp8,0,0.04219200213750204
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,8,1,128,1,float16,fp8,0,0.04809600114822388
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,8,2,128,1,float16,float16,0,0.049039999643961586
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,8,2,128,1,float16,fp8,0,0.04854399959246317
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,8,2,128,1,fp8,fp8,0,0.04273599882920583
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,8,4,128,1,float16,fp8,0,0.04881600042184194
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,8,4,128,1,float16,float16,0,0.04924266537030538
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,8,8,128,1,float16,float16,0,0.04357333481311798
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,8,1,128,1,float16,float16,0,0.044213334719340004
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,8,8,128,1,float16,fp8,0,0.04350399971008301
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,8,8,128,1,fp8,fp8,0,0.0379573330283165
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,8,4,128,1,fp8,fp8,0,0.04419200122356415
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,8,1,128,1,float16,fp8,0,0.043893332282702126
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,8,1,128,1,fp8,fp8,0,0.03710933278004328
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,8,2,128,1,fp8,fp8,0,0.03730133424202601
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,8,2,128,1,float16,fp8,0,0.04371733466784159
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,8,2,128,1,float16,float16,0,0.044112001856168113
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,8,4,128,1,float16,float16,0,0.044437333941459656
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,8,4,128,1,float16,fp8,0,0.04383466641108195
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,8,4,128,1,fp8,fp8,0,0.03782933453718821
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,8,1,128,1,float16,float16,0,0.9798986911773682
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,8,1,128,1,fp8,fp8,0,0.8293600082397461
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,8,1,128,1,float16,fp8,0,0.9715466499328613
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,8,2,128,1,float16,float16,0,0.9888373215993246
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,8,2,128,1,float16,fp8,0,0.982367992401123
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,8,2,128,1,fp8,fp8,0,0.8813973267873129
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,8,4,128,1,float16,float16,0,1.034117301305135
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,8,4,128,1,float16,fp8,0,1.0364747047424316
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,8,1,128,1,float16,float16,0,0.502618670463562
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,8,1,128,1,float16,fp8,0,0.5008266766866049
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,8,1,128,1,fp8,fp8,0,0.4163466691970825
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,8,8,128,1,float16,float16,0,0.5655306577682495
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,8,8,128,1,float16,fp8,0,0.5523840188980103
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,8,8,128,1,fp8,fp8,0,0.49664000670115155
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,8,2,128,1,float16,float16,0,0.5080480178197225
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,8,2,128,1,float16,fp8,0,0.5059200127919515
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,8,4,128,1,fp8,fp8,0,0.9680906931559244
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,8,2,128,1,fp8,fp8,0,0.44756801923116046
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,8,1,128,1,float16,float16,0,0.26361600557963055
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,8,8,128,1,float16,float16,0,0.29334400097529095
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,8,4,128,1,float16,float16,0,0.5240266720453898
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,8,1,128,1,float16,fp8,0,0.26154667139053345
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,8,4,128,1,float16,fp8,0,0.5203306674957275
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,8,4,128,1,fp8,fp8,0,0.491317351659139
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,8,8,128,1,fp8,fp8,0,0.257258673508962
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,8,1,128,1,fp8,fp8,0,0.2208319902420044
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,8,8,128,1,float16,fp8,0,0.2881493369738261
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,8,2,128,1,float16,float16,0,0.26610666513442993
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,8,2,128,1,float16,fp8,0,0.26442132393519086
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,8,2,128,1,fp8,fp8,0,0.22957332928975424
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,8,4,128,1,float16,float16,0,0.27614400784174603
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,8,4,128,1,float16,fp8,0,0.2733599940935771
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,8,4,128,1,fp8,fp8,0,0.25406932830810547
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,8,8,128,1,float16,float16,0,0.1600320041179657
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,8,1,128,1,float16,float16,0,0.14403200149536133
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,8,1,128,1,float16,fp8,0,0.1439466675122579
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,8,8,128,1,fp8,fp8,0,0.1323360006014506
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,8,1,128,1,fp8,fp8,0,0.11171733339627583
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,8,8,128,1,float16,fp8,0,0.15636799732844034
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,8,2,128,1,float16,float16,0,0.1460533340771993
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,8,2,128,1,float16,fp8,0,0.14517333110173544
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,8,2,128,1,fp8,fp8,0,0.11547199885050456
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,8,4,128,1,float16,float16,0,0.15237866838773093
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,8,8,128,1,float16,float16,0,0.08409066994984944
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,8,4,128,1,float16,fp8,0,0.15122666954994202
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,8,4,128,1,fp8,fp8,0,0.1283680001894633
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,8,8,128,1,fp8,fp8,0,0.07302399973074596
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,8,1,128,1,float16,float16,0,0.07442666590213776
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,8,1,128,1,float16,fp8,0,0.07472533484299977
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,8,8,128,1,float16,fp8,0,0.08235733211040497
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,8,1,128,1,fp8,fp8,0,0.0621013343334198
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,8,2,128,1,float16,float16,0,0.07558399935563405
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,8,2,128,1,fp8,fp8,0,0.06568533182144165
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,8,2,128,1,float16,fp8,0,0.07575466732184093
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,8,4,128,1,float16,float16,0,0.07920533418655396
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,8,4,128,1,fp8,fp8,0,0.07036800185839336
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,8,4,128,1,float16,fp8,0,0.07869333525498708
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,8,8,128,1,float16,fp8,0,0.04539200166861216
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,8,1,128,1,float16,fp8,0,0.04189866781234741
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,8,8,128,1,float16,float16,0,0.046112000942230225
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,8,1,128,1,float16,float16,0,0.04218133290608724
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,8,8,128,1,fp8,fp8,0,0.04309333364168803
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,8,1,128,1,fp8,fp8,0,0.0371573343873024
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,8,2,128,1,float16,float16,0,0.04257600009441376
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,8,2,128,1,float16,fp8,0,0.04267199834187826
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,8,4,128,1,float16,float16,0,0.04398400088151296
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,8,2,128,1,fp8,fp8,0,0.03760000069936117
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,8,8,128,1,float16,float16,0,0.03266666581233343
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,8,4,128,1,fp8,fp8,0,0.04004266609748205
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,8,8,128,1,float16,fp8,0,0.03254933406909307
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,8,4,128,1,float16,fp8,0,0.04399466514587402
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,8,8,128,1,fp8,fp8,0,0.029167999823888142
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,8,1,128,1,float16,float16,0,0.031685332457224526
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,8,1,128,1,float16,fp8,0,0.03196800003449122
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,8,1,128,1,fp8,fp8,0,0.027274665733178455
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,8,2,128,1,float16,float16,0,0.03205333401759466
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,8,2,128,1,float16,fp8,0,0.03230399886767069
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,8,2,128,1,fp8,fp8,0,0.027829334139823914
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,8,4,128,1,fp8,fp8,0,0.028783999383449554
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,8,4,128,1,float16,fp8,0,0.033045334120591484
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,8,4,128,1,float16,float16,0,0.03254399945338567
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,8,1,128,1,float16,float16,0,0.02959999938805898
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,8,8,128,1,float16,float16,0,0.029125332832336426
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,8,8,128,1,float16,fp8,0,0.029333333174387615
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,8,8,128,1,fp8,fp8,0,0.02499733368555705
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,8,1,128,1,float16,fp8,0,0.02958400050799052
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,8,1,128,1,fp8,fp8,0,0.024608001112937927
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,8,2,128,1,float16,float16,0,0.029258665939172108
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,8,2,128,1,fp8,fp8,0,0.025045332809289295
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,8,2,128,1,float16,fp8,0,0.029616000751654308
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,8,4,128,1,float16,fp8,0,0.029882666965325672
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,8,4,128,1,fp8,fp8,0,0.025370667378107708
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,8,4,128,1,float16,float16,0,0.029690665503342945
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,8,1,128,1,float16,float16,0,0.7650933265686035
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,8,1,128,1,fp8,fp8,0,0.6958879629770914
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,8,1,128,1,float16,fp8,0,0.762938658396403
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,8,2,128,1,float16,float16,0,0.7785867055257162
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,8,2,128,1,fp8,fp8,0,0.7410720189412435
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,8,2,128,1,float16,fp8,0,0.7751573721567789
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,8,4,128,1,float16,float16,0,0.8346347014109293
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,8,4,128,1,float16,fp8,0,0.8098186651865641
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,8,1,128,1,float16,float16,0,0.39100801944732666
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,8,1,128,1,float16,fp8,0,0.39133334159851074
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,8,8,128,1,float16,float16,0,0.46825067202250165
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,8,1,128,1,fp8,fp8,0,0.348527987798055
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,8,8,128,1,float16,fp8,0,0.44994131724039715
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,8,8,128,1,fp8,fp8,0,0.43033599853515625
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,8,2,128,1,float16,float16,0,0.4007306496302287
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,8,4,128,1,fp8,fp8,0,0.8253653049468994
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,8,2,128,1,float16,fp8,0,0.3958933353424072
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,8,2,128,1,fp8,fp8,0,0.37645868460337323
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,8,1,128,1,float16,float16,0,0.2050186594327291
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,8,8,128,1,float16,float16,0,0.24214933315912882
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,8,4,128,1,float16,float16,0,0.419487992922465
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,8,4,128,1,float16,fp8,0,0.41156800587972003
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,8,8,128,1,float16,fp8,0,0.23582400878270468
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,8,8,128,1,fp8,fp8,0,0.22233599424362183
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,8,1,128,1,float16,fp8,0,0.2055306633313497
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,8,4,128,1,fp8,fp8,0,0.421776016553243
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,8,1,128,1,fp8,fp8,0,0.1829493244489034
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,8,2,128,1,float16,float16,0,0.20888533194859824
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,8,2,128,1,float16,fp8,0,0.20866666237513223
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,8,2,128,1,fp8,fp8,0,0.19292799631754556
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,8,8,128,1,float16,float16,0,0.1321440041065216
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,8,4,128,1,fp8,fp8,0,0.2194826602935791
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,8,4,128,1,float16,float16,0,0.2200266718864441
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,8,4,128,1,float16,fp8,0,0.2169439991315206
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,8,8,128,1,float16,fp8,0,0.12779200077056885
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,8,1,128,1,float16,float16,0,0.11441066861152649
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,8,1,128,1,float16,fp8,0,0.11286933223406474
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,8,1,128,1,fp8,fp8,0,0.09514133135477702
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,8,8,128,1,fp8,fp8,0,0.11586133639017741
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,8,2,128,1,float16,float16,0,0.11572266618410747
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,8,2,128,1,float16,fp8,0,0.11475200454394023
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,8,4,128,1,float16,float16,0,0.12216533223787944
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,8,4,128,1,float16,fp8,0,0.12139200170834859
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,8,8,128,1,float16,float16,0,0.07136533161004384
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,8,2,128,1,fp8,fp8,0,0.09850666920344035
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,8,8,128,1,float16,fp8,0,0.06960533559322357
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,8,4,128,1,fp8,fp8,0,0.11001066366831462
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,8,1,128,1,float16,float16,0,0.061610668897628784
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,8,1,128,1,float16,fp8,0,0.061568001906077065
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,8,8,128,1,fp8,fp8,0,0.06517333288987477
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,8,1,128,1,fp8,fp8,0,0.054560000697771706
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,8,2,128,1,float16,float16,0,0.06270933151245117
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,8,2,128,1,float16,fp8,0,0.06299200157324474
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,8,2,128,1,fp8,fp8,0,0.05756799876689911
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,8,4,128,1,float16,float16,0,0.06698133548100789
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,8,4,128,1,float16,fp8,0,0.06613333523273468
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,8,8,128,1,float16,fp8,0,0.039306665460268654
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,8,4,128,1,fp8,fp8,0,0.06117333471775055
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,8,1,128,1,float16,float16,0,0.035829332967599235
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,8,1,128,1,float16,fp8,0,0.03591466695070267
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,8,8,128,1,float16,float16,0,0.03980266551176707
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,8,8,128,1,fp8,fp8,0,0.0378506655494372
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,8,1,128,1,fp8,fp8,0,0.032127998769283295
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,8,2,128,1,float16,float16,0,0.0365226666132609
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,8,2,128,1,float16,fp8,0,0.036415999134381614
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,8,4,128,1,float16,float16,0,0.0377866675456365
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,8,2,128,1,fp8,fp8,0,0.03303466737270355
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,8,4,128,1,float16,fp8,0,0.03754133234421412
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,8,8,128,1,float16,float16,0,0.02641066660483678
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,8,1,128,1,float16,float16,0,0.024986666937669117
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,8,1,128,1,float16,fp8,0,0.025216000775496166
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,8,1,128,1,fp8,fp8,0,0.023152001202106476
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,8,8,128,1,float16,fp8,0,0.026378666361172993
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,8,8,128,1,fp8,fp8,0,0.02497066557407379
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,8,2,128,1,float16,float16,0,0.025706666211287182
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,8,2,128,1,float16,fp8,0,0.025744001070658367
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,8,4,128,1,fp8,fp8,0,0.03572266548871994
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,8,2,128,1,fp8,fp8,0,0.023754666248957317
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,8,8,128,1,float16,fp8,0,0.023354666928450268
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,8,4,128,1,float16,fp8,0,0.02622399975856145
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,8,4,128,1,float16,float16,0,0.02622933437426885
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,8,8,128,1,float16,float16,0,0.023039999107519787
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,8,8,128,1,fp8,fp8,0,0.020736000190178554
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,8,4,128,1,fp8,fp8,0,0.02499733368555705
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,8,1,128,1,float16,float16,0,0.022597332795461018
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,8,1,128,1,float16,fp8,0,0.022661333282788593
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,8,1,128,1,fp8,fp8,0,0.020442667106787365
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,8,2,128,1,float16,fp8,0,0.02293333411216736
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,8,2,128,1,float16,float16,0,0.022634667654832203
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,8,2,128,1,fp8,fp8,0,0.020554666717847187
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,8,4,128,1,float16,float16,0,0.02309333284695943
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,8,4,128,1,float16,fp8,0,0.02327466756105423
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,8,8,128,1,float16,float16,0,0.02123733361562093
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,8,4,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,8,8,128,1,float16,fp8,0,0.02181333303451538
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,8,8,128,1,fp8,fp8,0,0.019546666493018467
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,8,1,128,1,float16,float16,0,0.021189334491888683
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,8,1,128,1,float16,fp8,0,0.021546666820844013
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,8,2,128,1,float16,float16,0,0.021695998807748158
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,8,1,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,8,4,128,1,float16,float16,0,0.021536000072956085
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,8,2,128,1,float16,fp8,0,0.02216533323129018
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,8,4,128,1,float16,fp8,0,0.021935999393463135
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,8,4,128,1,fp8,fp8,0,0.01937066639463107
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,8,2,128,1,fp8,fp8,0,0.019498666127522785
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,8,1,128,1,float16,fp8,0,0.33322133620580036
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,8,1,128,1,fp8,fp8,0,0.2910240093866984
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,8,1,128,1,float16,float16,0,0.3333866596221924
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,8,2,128,1,float16,float16,0,0.34512531757354736
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,8,2,128,1,fp8,fp8,0,0.3199999928474426
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,8,2,128,1,float16,fp8,0,0.340554674466451
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,8,4,128,1,float16,float16,0,0.3649653196334839
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,8,4,128,1,float16,fp8,0,0.35893865426381427
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,8,1,128,1,float16,float16,0,0.17561066150665283
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,8,1,128,1,float16,fp8,0,0.17609065771102905
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,8,8,128,1,float16,float16,0,0.2148639957110087
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,8,8,128,1,float16,fp8,0,0.20843732357025146
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,8,4,128,1,fp8,fp8,0,0.3703306516011556
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,8,1,128,1,fp8,fp8,0,0.1544533371925354
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,8,8,128,1,fp8,fp8,0,0.19629865884780884
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,8,2,128,1,float16,fp8,0,0.17835734287897745
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,8,2,128,1,float16,float16,0,0.1809813380241394
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,8,2,128,1,fp8,fp8,0,0.1653439998626709
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,8,4,128,1,float16,float16,0,0.19086400667826334
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,8,4,128,1,float16,fp8,0,0.19037866592407227
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,8,8,128,1,float16,float16,0,0.11717866857846577
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,8,4,128,1,fp8,fp8,0,0.1937333345413208
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,8,8,128,1,float16,fp8,0,0.11436266700426738
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,8,1,128,1,float16,fp8,0,0.09773332873980205
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,8,1,128,1,float16,float16,0,0.09799999992052714
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,8,8,128,1,fp8,fp8,0,0.10711999734242757
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,8,1,128,1,fp8,fp8,0,0.08639466762542725
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,8,2,128,1,float16,fp8,0,0.10005866487820943
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,8,2,128,1,float16,float16,0,0.10115200281143188
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,8,2,128,1,fp8,fp8,0,0.0897226631641388
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,8,4,128,1,float16,float16,0,0.10753066341082256
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,8,4,128,1,float16,fp8,0,0.10556800166765849
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,8,8,128,1,float16,float16,0,0.0682239979505539
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,8,1,128,1,float16,float16,0,0.05477866530418396
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,8,4,128,1,fp8,fp8,0,0.10233599940935771
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,8,8,128,1,float16,fp8,0,0.06685333450635274
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,8,1,128,1,float16,fp8,0,0.05504000186920166
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,8,8,128,1,fp8,fp8,0,0.060453335444132485
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,8,1,128,1,fp8,fp8,0,0.04925866425037384
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,8,2,128,1,float16,float16,0,0.056474665800730385
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,8,2,128,1,float16,fp8,0,0.05658133327960968
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,8,2,128,1,fp8,fp8,0,0.05102399984995524
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,8,4,128,1,float16,fp8,0,0.05959466596444448
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,8,8,128,1,float16,float16,0,0.03673599908749262
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,8,4,128,1,fp8,fp8,0,0.05640000104904175
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,8,8,128,1,fp8,fp8,0,0.03591466695070267
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,8,1,128,1,float16,float16,0,0.03295466552178065
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,8,8,128,1,float16,fp8,0,0.03601066768169403
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,8,4,128,1,float16,float16,0,0.06053866446018219
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,8,1,128,1,float16,fp8,0,0.0330079992612203
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,8,1,128,1,fp8,fp8,0,0.030320001145203907
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,8,2,128,1,float16,fp8,0,0.033626665671666466
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,8,2,128,1,float16,float16,0,0.03377600014209747
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,8,4,128,1,float16,float16,0,0.03510399907827377
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,8,2,128,1,fp8,fp8,0,0.030997333427270252
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,8,4,128,1,float16,fp8,0,0.0351200004418691
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,8,8,128,1,float16,float16,0,0.02386666586001714
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,8,4,128,1,fp8,fp8,0,0.03370666752258936
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,8,8,128,1,float16,fp8,0,0.023962666591008503
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,8,8,128,1,fp8,fp8,0,0.02313599983851115
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,8,1,128,1,float16,fp8,0,0.02271999915440877
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,8,1,128,1,float16,float16,0,0.022570667167504627
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,8,1,128,1,fp8,fp8,0,0.021189334491888683
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,8,2,128,1,float16,float16,0,0.022976001103719074
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,8,2,128,1,float16,fp8,0,0.02274666726589203
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,8,4,128,1,float16,fp8,0,0.0236160010099411
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,8,4,128,1,fp8,fp8,0,0.022805333137512207
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,8,4,128,1,float16,float16,0,0.023445333043734234
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,8,2,128,1,fp8,fp8,0,0.02162666618824005
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,8,8,128,1,float16,float16,0,0.02032533288002014
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,8,8,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,8,8,128,1,float16,fp8,0,0.020495999604463577
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,8,1,128,1,float16,float16,0,0.019007999449968338
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,8,1,128,1,float16,fp8,0,0.01929066702723503
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,8,1,128,1,fp8,fp8,0,0.01838933303952217
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,8,2,128,1,float16,float16,0,0.019296000401178997
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,8,4,128,1,float16,float16,0,0.019632000476121902
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,8,2,128,1,fp8,fp8,0,0.018453333526849747
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,8,4,128,1,float16,fp8,0,0.019765333582957584
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,8,2,128,1,float16,fp8,0,0.019424000134070713
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,8,4,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,8,8,128,1,float16,fp8,0,0.018917333334684372
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,8,8,128,1,float16,float16,0,0.018218666315078735
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,8,1,128,1,float16,float16,0,0.01801066721479098
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,8,8,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,8,1,128,1,float16,fp8,0,0.01815466706951459
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,8,1,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,8,2,128,1,float16,float16,0,0.018053332964579265
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,8,2,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,8,4,128,1,float16,fp8,0,0.018426666657129925
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,8,4,128,1,float16,float16,0,0.018288000176350277
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,8,2,128,1,float16,fp8,0,0.018266666680574417
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,8,4,128,1,fp8,fp8,0,0.017370666066805523
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,8,8,128,1,float16,float16,0,0.017386666188637417
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,8,8,128,1,float16,fp8,0,0.01791999985774358
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,8,1,128,1,float16,float16,0,0.01758933315674464
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,8,1,128,1,float16,fp8,0,0.018122666825850803
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,8,2,128,1,float16,float16,0,0.017658667018016178
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,8,1,128,1,fp8,fp8,0,0.016842667013406754
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,8,2,128,1,fp8,fp8,0,0.016783999900023144
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,8,2,128,1,float16,fp8,0,0.017845333864291508
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,8,4,128,1,float16,float16,0,0.017514667163292568
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,8,8,128,1,fp8,fp8,0,0.016565332810084026
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,8,4,128,1,float16,fp8,0,0.01826133330663045
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,8,4,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,8,1,128,1,float16,float16,0,0.16711467504501343
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,8,1,128,1,float16,fp8,0,0.1662453313668569
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,8,2,128,1,float16,float16,0,0.17005866765975952
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,8,2,128,1,float16,fp8,0,0.16915200153986612
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,8,1,128,1,fp8,fp8,0,0.15336533387502035
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,8,2,128,1,fp8,fp8,0,0.16474666198094687
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,8,4,128,1,float16,fp8,0,0.17991999785105386
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,8,4,128,1,float16,float16,0,0.18525866667429605
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,8,8,128,1,float16,float16,0,0.11921067039171855
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,8,8,128,1,float16,fp8,0,0.11593066652615865
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,8,1,128,1,float16,float16,0,0.09250133236249287
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,8,8,128,1,fp8,fp8,0,0.10526399811108907
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,8,4,128,1,fp8,fp8,0,0.19150400161743164
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,8,1,128,1,fp8,fp8,0,0.08560533324877422
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,8,1,128,1,float16,fp8,0,0.17045867443084717
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,8,2,128,1,float16,float16,0,0.09703999757766724
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,8,2,128,1,float16,fp8,0,0.09565333525339763
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,8,2,128,1,fp8,fp8,0,0.08937066793441772
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,8,4,128,1,float16,float16,0,0.10402666529019673
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,8,8,128,1,float16,float16,0,0.06451733410358429
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,8,4,128,1,float16,fp8,0,0.10096533099810283
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,8,8,128,1,float16,fp8,0,0.0636053333679835
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,8,4,128,1,fp8,fp8,0,0.10032000144322713
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,8,8,128,1,fp8,fp8,0,0.05917333563168844
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,8,1,128,1,float16,float16,0,0.051967998345692955
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,8,1,128,1,float16,fp8,0,0.052426666021347046
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,8,1,128,1,fp8,fp8,0,0.04906133313973745
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,8,2,128,1,float16,float16,0,0.05392000079154968
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,8,2,128,1,float16,fp8,0,0.053743998209635414
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,8,2,128,1,fp8,fp8,0,0.051407997806866966
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,8,4,128,1,float16,float16,0,0.05836800237496694
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,8,4,128,1,float16,fp8,0,0.05676800012588501
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,8,8,128,1,float16,float16,0,0.035642666121323906
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,8,4,128,1,fp8,fp8,0,0.05606399973233541
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,8,1,128,1,float16,float16,0,0.0315733328461647
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,8,8,128,1,fp8,fp8,0,0.03571200122435888
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,8,8,128,1,float16,fp8,0,0.03474666674931844
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,8,1,128,1,fp8,fp8,0,0.03047466774781545
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,8,1,128,1,float16,fp8,0,0.03169066707293192
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,8,2,128,1,float16,float16,0,0.032314665615558624
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,8,2,128,1,float16,fp8,0,0.03235200047492981
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,8,2,128,1,fp8,fp8,0,0.0308693324526151
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,8,4,128,1,float16,fp8,0,0.03369600077470144
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,8,4,128,1,float16,float16,0,0.03336533407370249
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,8,8,128,1,float16,float16,0,0.023354666928450268
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,8,4,128,1,fp8,fp8,0,0.03294399877389272
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,8,8,128,1,float16,fp8,0,0.023157333334287006
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,8,1,128,1,float16,float16,0,0.021674667795499165
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,8,8,128,1,fp8,fp8,0,0.022976001103719074
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,8,1,128,1,fp8,fp8,0,0.02111999938885371
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,8,2,128,1,float16,fp8,0,0.022197333474953968
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,8,1,128,1,float16,fp8,0,0.021712000171343487
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,8,2,128,1,float16,float16,0,0.021856000026067097
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,8,2,128,1,fp8,fp8,0,0.021498667697111767
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,8,4,128,1,float16,float16,0,0.022543999056021374
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,8,8,128,1,float16,fp8,0,0.01855466639002164
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,8,4,128,1,float16,fp8,0,0.022853332261244457
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,8,8,128,1,float16,float16,0,0.01858666663368543
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,8,1,128,1,float16,float16,0,0.017871999492247898
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,8,8,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,8,1,128,1,float16,fp8,0,0.018063999712467194
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,8,2,128,1,float16,float16,0,0.01830400029818217
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,8,4,128,1,fp8,fp8,0,0.02258666604757309
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,8,1,128,1,fp8,fp8,0,0.018325333793958027
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,8,2,128,1,float16,fp8,0,0.018218666315078735
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,8,2,128,1,fp8,fp8,0,0.018629333625237148
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,8,4,128,1,float16,float16,0,0.018298666924238205
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,8,4,128,1,fp8,fp8,0,0.0191040001809597
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,8,4,128,1,float16,fp8,0,0.018543999642133713
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,8,8,128,1,float16,float16,0,0.01703466723362605
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,8,8,128,1,float16,fp8,0,0.01714133347074191
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,8,1,128,1,float16,float16,0,0.016586666305859882
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,8,1,128,1,float16,fp8,0,0.017279999951521557
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,8,8,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,8,1,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,8,2,128,1,float16,float16,0,0.016864000509182613
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,8,2,128,1,fp8,fp8,0,0.017397332936525345
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,8,4,128,1,float16,float16,0,0.016858667135238647
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,8,4,128,1,float16,fp8,0,0.017360000560681026
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,8,2,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,8,4,128,1,fp8,fp8,0,0.017386666188637417
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,8,8,128,1,float16,float16,0,0.01599466676513354
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,8,8,128,1,float16,fp8,0,0.01658133293191592
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,8,8,128,1,fp8,fp8,0,0.016384000579516094
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,8,1,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,8,1,128,1,float16,float16,0,0.016293333222468693
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,8,2,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,8,1,128,1,fp8,fp8,0,0.01664000004529953
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,8,2,128,1,float16,float16,0,0.016415999581416447
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,8,2,128,1,fp8,fp8,0,0.016757333030303318
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,8,4,128,1,float16,float16,0,0.01632533346613248
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,8,8,128,1,float16,fp8,0,0.016154666741689045
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,8,4,128,1,float16,fp8,0,0.016805333395799
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,8,8,128,1,float16,float16,0,0.01552533358335495
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,8,4,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,8,8,128,1,fp8,fp8,0,0.016021333634853363
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,8,1,128,1,float16,float16,0,0.01586666703224182
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,8,1,128,1,fp8,fp8,0,0.01639466608564059
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,8,1,128,1,float16,fp8,0,0.016149333367745083
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,8,2,128,1,float16,float16,0,0.01587733378012975
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,8,2,128,1,fp8,fp8,0,0.016271999726692837
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,8,2,128,1,float16,fp8,0,0.016544000556071598
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,8,4,128,1,float16,fp8,0,0.016224000602960587
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,8,4,128,1,float16,float16,0,0.016058667252461117
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,8,4,128,1,fp8,fp8,0,0.0162773331006368
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,8,1,128,1,float16,float16,0,0.10801600416501363
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,8,1,128,1,fp8,fp8,0,0.11281067132949829
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,8,2,128,1,float16,float16,0,0.11036266883214314
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,8,2,128,1,float16,fp8,0,0.10899200042088826
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,8,1,128,1,float16,fp8,0,0.10778133074442546
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,8,2,128,1,fp8,fp8,0,0.11723732948303223
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,8,4,128,1,float16,float16,0,0.11918399731318156
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,8,4,128,1,float16,fp8,0,0.11622400085131328
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,8,8,128,1,float16,float16,0,0.06660800178845723
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,8,4,128,1,fp8,fp8,0,0.12718933820724487
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,8,1,128,1,float16,fp8,0,0.0591893345117569
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,8,8,128,1,float16,fp8,0,0.06634133557478587
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,8,1,128,1,fp8,fp8,0,0.06279466549555461
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,8,8,128,1,fp8,fp8,0,0.07274666428565979
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,8,1,128,1,float16,float16,0,0.059562668204307556
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,8,2,128,1,float16,float16,0,0.06132799883683523
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,8,2,128,1,float16,fp8,0,0.060821334520975746
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,8,2,128,1,fp8,fp8,0,0.06524799764156342
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,8,4,128,1,fp8,fp8,0,0.07006399830182393
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,8,4,128,1,float16,float16,0,0.0646613339583079
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,8,8,128,1,float16,float16,0,0.03699733316898346
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,8,4,128,1,float16,fp8,0,0.06407466530799866
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,8,1,128,1,float16,float16,0,0.03493333359559377
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,8,8,128,1,float16,fp8,0,0.03623999903599421
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,8,8,128,1,fp8,fp8,0,0.04242666562398275
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,8,1,128,1,float16,fp8,0,0.03500800083080927
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,8,1,128,1,fp8,fp8,0,0.03721066564321518
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,8,2,128,1,float16,float16,0,0.035487999518712364
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,8,2,128,1,float16,fp8,0,0.035445332527160645
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,8,2,128,1,fp8,fp8,0,0.03826133410135905
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,8,4,128,1,float16,float16,0,0.03700799991687139
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,8,4,128,1,fp8,fp8,0,0.04045333216587702
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,8,8,128,1,float16,fp8,0,0.024192000428835552
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,8,8,128,1,float16,float16,0,0.024170666933059692
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,8,4,128,1,float16,fp8,0,0.03664000084002813
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,8,1,128,1,float16,float16,0,0.023130667706330616
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,8,8,128,1,fp8,fp8,0,0.02605866640806198
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,8,1,128,1,float16,fp8,0,0.023578666150569916
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,8,1,128,1,fp8,fp8,0,0.0249439999461174
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,8,2,128,1,float16,fp8,0,0.023887999355793
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,8,2,128,1,float16,float16,0,0.023669332265853882
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,8,2,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,8,4,128,1,float16,fp8,0,0.024453334510326385
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,8,8,128,1,float16,float16,0,0.01823466643691063
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,8,4,128,1,fp8,fp8,0,0.025994665920734406
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,8,4,128,1,float16,float16,0,0.024373332659403484
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,8,8,128,1,float16,fp8,0,0.018346666047970455
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,8,8,128,1,fp8,fp8,0,0.019141333798567455
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,8,1,128,1,float16,float16,0,0.01718933383623759
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,8,1,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,8,1,128,1,fp8,fp8,0,0.018138666947682697
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,8,2,128,1,float16,float16,0,0.017504000415404636
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,8,4,128,1,float16,float16,0,0.017727999637524288
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,8,4,128,1,float16,fp8,0,0.017786666750907898
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,8,2,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,8,8,128,1,float16,float16,0,0.015856000284353893
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,8,2,128,1,fp8,fp8,0,0.018581333259741466
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,8,8,128,1,float16,fp8,0,0.016074666132529575
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,8,8,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,8,4,128,1,fp8,fp8,0,0.018906666586796444
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,8,1,128,1,float16,float16,0,0.015658666690190632
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,8,2,128,1,float16,float16,0,0.015925332903862
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,8,1,128,1,float16,fp8,0,0.016165333489576977
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,8,4,128,1,float16,float16,0,0.01584533353646596
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,8,1,128,1,fp8,fp8,0,0.01681600014368693
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,8,2,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,8,2,128,1,float16,fp8,0,0.01635733370979627
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,8,4,128,1,float16,fp8,0,0.01613333324591319
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,8,4,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,8,8,128,1,fp8,fp8,0,0.0164533331990242
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,8,8,128,1,float16,float16,0,0.014997333288192749
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,8,8,128,1,float16,fp8,0,0.01553600033124288
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,8,1,128,1,float16,fp8,0,0.015765332927306492
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,8,1,128,1,float16,float16,0,0.014917333920796713
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,8,2,128,1,float16,float16,0,0.015168000012636185
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,8,2,128,1,float16,fp8,0,0.01589866727590561
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,8,1,128,1,fp8,fp8,0,0.016458666572968166
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,8,2,128,1,fp8,fp8,0,0.0163680004576842
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,8,4,128,1,float16,float16,0,0.015253332753976187
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,8,4,128,1,fp8,fp8,0,0.016330666840076447
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,8,4,128,1,float16,fp8,0,0.015834666788578033
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,8,8,128,1,float16,float16,0,0.014325333138306936
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,8,8,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,8,1,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,8,8,128,1,fp8,fp8,0,0.015749332805474598
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,8,1,128,1,fp8,fp8,0,0.016048000504573185
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,8,2,128,1,float16,float16,0,0.01471466695268949
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,8,2,128,1,float16,fp8,0,0.01552533358335495
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,8,2,128,1,fp8,fp8,0,0.015978666643301647
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,8,1,128,1,float16,float16,0,0.014645333091417948
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,8,4,128,1,float16,float16,0,0.014949332922697067
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,8,4,128,1,float16,fp8,0,0.015247999380032221
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,8,8,128,1,float16,float16,0,0.014015999933083853
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,8,8,128,1,float16,fp8,0,0.014592000593741735
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,8,8,128,1,fp8,fp8,0,0.015562667200962702
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,8,1,128,1,float16,float16,0,0.014325333138306936
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,8,4,128,1,fp8,fp8,0,0.016058667252461117
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,8,2,128,1,float16,float16,0,0.014581333845853806
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,8,1,128,1,fp8,fp8,0,0.015856000284353893
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,8,4,128,1,float16,float16,0,0.01469333345691363
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,8,2,128,1,float16,fp8,0,0.015072000523408255
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,8,1,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,8,2,128,1,fp8,fp8,0,0.015743999431530636
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,8,4,128,1,float16,fp8,0,0.014933332800865173
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,8,4,128,1,fp8,fp8,0,0.015856000284353893
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,8,1,128,1,float16,float16,0,0.07565866907437642
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,8,1,128,1,float16,fp8,0,0.07629333436489105
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,8,1,128,1,fp8,fp8,0,0.09118400017420451
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,8,2,128,1,float16,float16,0,0.07852266728878021
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,8,2,128,1,fp8,fp8,0,0.09388267000516255
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,8,2,128,1,float16,fp8,0,0.07798933486143748
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,8,4,128,1,float16,float16,0,0.08135466774304707
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,8,4,128,1,float16,fp8,0,0.07998933394749959
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,8,8,128,1,float16,float16,0,0.0443146675825119
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,8,4,128,1,fp8,fp8,0,0.09814400474230449
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,8,8,128,1,float16,fp8,0,0.043663998444875084
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,8,1,128,1,float16,float16,0,0.04302933315436045
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,8,8,128,1,fp8,fp8,0,0.05669866502285004
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,8,1,128,1,fp8,fp8,0,0.050623998045921326
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,8,2,128,1,float16,float16,0,0.043925335009892784
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,8,2,128,1,float16,fp8,0,0.043280000487963356
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,8,2,128,1,fp8,fp8,0,0.0521066685517629
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,8,1,128,1,float16,fp8,0,0.04372799893220266
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,8,4,128,1,float16,fp8,0,0.044810667634010315
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,8,4,128,1,fp8,fp8,0,0.05410666763782501
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,8,4,128,1,float16,float16,0,0.045781334241231285
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,8,8,128,1,float16,float16,0,0.02700799951950709
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,8,8,128,1,float16,fp8,0,0.027045334378878277
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,8,1,128,1,float16,float16,0,0.02699733277161916
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,8,8,128,1,fp8,fp8,0,0.033189333975315094
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,8,1,128,1,fp8,fp8,0,0.03156800071398417
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,8,2,128,1,float16,float16,0,0.02740799884001414
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,8,2,128,1,float16,fp8,0,0.027744000156720478
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,8,1,128,1,float16,fp8,0,0.027263998985290527
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,8,4,128,1,float16,float16,0,0.028138667345046997
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,8,2,128,1,fp8,fp8,0,0.03190933416287104
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,8,4,128,1,fp8,fp8,0,0.03331733246644338
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,8,4,128,1,float16,fp8,0,0.028058665494124096
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,8,8,128,1,float16,float16,0,0.019402666638294857
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,8,8,128,1,fp8,fp8,0,0.022570667167504627
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,8,1,128,1,float16,fp8,0,0.018922666708628338
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,8,8,128,1,float16,fp8,0,0.01952533299724261
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,8,2,128,1,float16,float16,0,0.01926933353145917
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,8,1,128,1,fp8,fp8,0,0.02163200080394745
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,8,2,128,1,float16,fp8,0,0.019296000401178997
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,8,1,128,1,float16,float16,0,0.019088000059127808
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,8,2,128,1,fp8,fp8,0,0.02195200075705846
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,8,4,128,1,float16,float16,0,0.019573333362738293
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,8,4,128,1,float16,fp8,0,0.019472000499566395
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,8,8,128,1,float16,float16,0,0.015594666202863058
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,8,8,128,1,float16,fp8,0,0.015935999651749928
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,8,8,128,1,fp8,fp8,0,0.017802666872739792
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,8,1,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,8,1,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,8,4,128,1,fp8,fp8,0,0.02403733382622401
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,8,1,128,1,float16,float16,0,0.014991999914248785
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,8,2,128,1,float16,fp8,0,0.015546667079130808
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,8,2,128,1,float16,float16,0,0.014896000425020853
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,8,2,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,8,4,128,1,float16,float16,0,0.015423999478419622
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,8,4,128,1,float16,fp8,0,0.015637333194414776
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,8,4,128,1,fp8,fp8,0,0.017504000415404636
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,8,8,128,1,float16,float16,0,0.014698666830857595
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,8,8,128,1,fp8,fp8,0,0.016544000556071598
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,8,1,128,1,float16,float16,0,0.014426667243242264
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,8,1,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,8,2,128,1,float16,float16,0,0.014544000228246054
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,8,1,128,1,fp8,fp8,0,0.01648533344268799
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,8,8,128,1,float16,fp8,0,0.014991999914248785
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,8,2,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,8,4,128,1,float16,float16,0,0.014389333625634512
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,8,2,128,1,fp8,fp8,0,0.016522667060295742
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,8,4,128,1,float16,fp8,0,0.01509333277742068
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,8,4,128,1,fp8,fp8,0,0.016437333077192307
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,8,8,128,1,float16,float16,0,0.013829333086808523
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,8,8,128,1,float16,fp8,0,0.01434133326013883
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,8,1,128,1,float16,float16,0,0.014202666779359182
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,8,1,128,1,float16,fp8,0,0.014592000593741735
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,8,1,128,1,fp8,fp8,0,0.01600533351302147
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,8,2,128,1,float16,float16,0,0.014090667168299357
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,8,2,128,1,float16,fp8,0,0.014645333091417948
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,8,2,128,1,fp8,fp8,0,0.016021333634853363
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,8,4,128,1,float16,float16,0,0.014080000420411428
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,8,8,128,1,fp8,fp8,0,0.01597333326935768
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,8,4,128,1,float16,fp8,0,0.01488000030318896
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,8,4,128,1,fp8,fp8,0,0.01617066686352094
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,8,8,128,1,float16,float16,0,0.013770667215188345
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,8,1,128,1,float16,float16,0,0.013888000200192133
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,8,8,128,1,float16,fp8,0,0.01422400027513504
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,8,1,128,1,float16,fp8,0,0.014373333503802618
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,8,8,128,1,fp8,fp8,0,0.01569066693385442
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,8,2,128,1,float16,float16,0,0.013994666437307993
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,8,1,128,1,fp8,fp8,0,0.01575999955336253
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,8,2,128,1,float16,fp8,0,0.014463999619086584
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,8,2,128,1,fp8,fp8,0,0.016063999384641647
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,8,4,128,1,float16,float16,0,0.013845333208640417
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,8,4,128,1,float16,fp8,0,0.014650666465361914
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,8,4,128,1,fp8,fp8,0,0.01597333326935768
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,8,8,128,1,float16,float16,0,0.013183999806642532
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,8,8,128,1,float16,fp8,0,0.01393066719174385
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,8,1,128,1,float16,float16,0,0.013728000223636627
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,8,1,128,1,fp8,fp8,0,0.015669333438078564
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,8,1,128,1,float16,fp8,0,0.014389333625634512
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,8,2,128,1,float16,fp8,0,0.014117332796255747
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,8,2,128,1,float16,float16,0,0.013797332843144735
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,8,2,128,1,fp8,fp8,0,0.01568000018596649
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,8,8,128,1,fp8,fp8,0,0.015498666713635126
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,8,4,128,1,float16,float16,0,0.013797332843144735
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,8,4,128,1,float16,fp8,0,0.014175999909639359
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,8,4,128,1,fp8,fp8,0,0.01570133368174235
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,8,1,128,1,float16,float16,0,0.06643733382225037
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,8,1,128,1,float16,fp8,0,0.06614933411280315
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,8,1,128,1,fp8,fp8,0,0.07875733574231465
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,8,2,128,1,fp8,fp8,0,0.07969599962234497
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,8,2,128,1,float16,float16,0,0.0664213349421819
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,8,2,128,1,float16,fp8,0,0.0668693333864212
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,8,4,128,1,float16,float16,0,0.06783999999364217
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,8,4,128,1,float16,fp8,0,0.06769066552321117
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,8,8,128,1,float16,float16,0,0.03603733330965042
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,8,8,128,1,float16,fp8,0,0.035690667728583016
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,8,4,128,1,fp8,fp8,0,0.08220799763997395
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,8,8,128,1,fp8,fp8,0,0.04706133405367533
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,8,1,128,1,float16,float16,0,0.038719999293486275
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,8,1,128,1,float16,fp8,0,0.0390133336186409
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,8,1,128,1,fp8,fp8,0,0.04603200157483419
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,8,2,128,1,float16,fp8,0,0.03934400031963984
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,8,2,128,1,float16,float16,0,0.03907199949026108
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,8,2,128,1,fp8,fp8,0,0.04622933268547058
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,8,4,128,1,float16,fp8,0,0.0397173340121905
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,8,4,128,1,float16,float16,0,0.0395359992980957
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,8,4,128,1,fp8,fp8,0,0.047338664531707764
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,8,8,128,1,float16,fp8,0,0.023946667710940044
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,8,8,128,1,fp8,fp8,0,0.029802667597929638
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,8,1,128,1,float16,fp8,0,0.025242666403452556
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,8,8,128,1,float16,float16,0,0.02401600033044815
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,8,1,128,1,fp8,fp8,0,0.02902399996916453
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,8,1,128,1,float16,float16,0,0.02481066683928172
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,8,2,128,1,float16,float16,0,0.0249493345618248
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,8,2,128,1,float16,fp8,0,0.025066666305065155
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,8,4,128,1,float16,float16,0,0.025226667523384094
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,8,2,128,1,fp8,fp8,0,0.02942933390537898
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,8,4,128,1,float16,fp8,0,0.025749333202838898
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,8,8,128,1,float16,float16,0,0.018218666315078735
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,8,4,128,1,fp8,fp8,0,0.030048000315825146
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,8,1,128,1,float16,float16,0,0.017690667261679966
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,8,8,128,1,fp8,fp8,0,0.02085866779088974
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,8,8,128,1,float16,fp8,0,0.01823466643691063
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,8,1,128,1,float16,fp8,0,0.018229333062966663
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,8,2,128,1,float16,float16,0,0.017994667092959087
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,8,1,128,1,fp8,fp8,0,0.020618667205174763
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,8,2,128,1,float16,fp8,0,0.01828266680240631
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,8,4,128,1,float16,float16,0,0.017829333742459614
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,8,2,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,8,4,128,1,float16,fp8,0,0.018218666315078735
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,8,4,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,8,8,128,1,float16,float16,0,0.015040000279744467
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,8,8,128,1,float16,fp8,0,0.015333333363135656
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,8,1,128,1,float16,float16,0,0.014762666076421738
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,8,8,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,8,1,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,8,2,128,1,float16,float16,0,0.014805333067973455
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,8,1,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,8,2,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,8,4,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,8,4,128,1,float16,float16,0,0.01505600040157636
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,8,2,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,8,4,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,8,8,128,1,float16,float16,0,0.014053333550691605
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,8,8,128,1,float16,fp8,0,0.014698666830857595
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,8,1,128,1,float16,fp8,0,0.014896000425020853
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,8,1,128,1,float16,float16,0,0.014149333039919535
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,8,2,128,1,float16,float16,0,0.014175999909639359
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,8,8,128,1,fp8,fp8,0,0.016517333686351776
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,8,2,128,1,float16,fp8,0,0.014645333091417948
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,8,2,128,1,fp8,fp8,0,0.016352000335852306
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,8,4,128,1,float16,fp8,0,0.014842666685581207
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,8,4,128,1,float16,float16,0,0.014186666657527288
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,8,1,128,1,fp8,fp8,0,0.01629866659641266
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,8,8,128,1,float16,float16,0,0.013776000589132309
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,8,4,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,8,8,128,1,float16,fp8,0,0.014384000251690546
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,8,1,128,1,float16,fp8,0,0.014629332969586054
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,8,1,128,1,float16,float16,0,0.013823999712864557
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,8,8,128,1,fp8,fp8,0,0.016048000504573185
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,8,1,128,1,fp8,fp8,0,0.01609066625436147
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,8,2,128,1,float16,float16,0,0.01392000044385592
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,8,2,128,1,float16,fp8,0,0.014666666587193808
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,8,2,128,1,fp8,fp8,0,0.01600533351302147
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,8,4,128,1,float16,fp8,0,0.014629332969586054
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,8,4,128,1,float16,float16,0,0.013957332819700241
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,8,4,128,1,fp8,fp8,0,0.016224000602960587
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,8,8,128,1,float16,float16,0,0.013445333888133367
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,8,8,128,1,float16,fp8,0,0.014138666292031607
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,8,1,128,1,float16,float16,0,0.013882666826248169
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,8,8,128,1,fp8,fp8,0,0.01568000018596649
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,8,1,128,1,float16,fp8,0,0.014426667243242264
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,8,1,128,1,fp8,fp8,0,0.015989333391189575
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,8,2,128,1,float16,fp8,0,0.014453332871198654
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,8,2,128,1,fp8,fp8,0,0.015893333901961643
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,8,2,128,1,float16,float16,0,0.013781332721312841
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,8,4,128,1,float16,fp8,0,0.01452800010641416
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,8,4,128,1,float16,float16,0,0.013872000078360239
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,8,4,128,1,fp8,fp8,0,0.015829333414634068
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,8,8,128,1,float16,fp8,0,0.013653332988421122
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,8,8,128,1,float16,float16,0,0.013056000073750814
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,8,8,128,1,fp8,fp8,0,0.01545599972208341
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,8,1,128,1,fp8,fp8,0,0.01569066693385442
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,8,1,128,1,float16,float16,0,0.013616000612576803
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,8,1,128,1,float16,fp8,0,0.014149333039919535
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,8,2,128,1,float16,float16,0,0.013546666751305262
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,8,2,128,1,float16,fp8,0,0.014149333039919535
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,8,2,128,1,fp8,fp8,0,0.01571200042963028
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,8,4,128,1,float16,float16,0,0.013610667238632837
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,8,4,128,1,fp8,fp8,0,0.015728000551462173
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,8,4,128,1,float16,fp8,0,0.014117332796255747
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,4,1,128,1,fp8,fp8,0,2.6027466456095376
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,4,1,128,1,float16,fp8,0,3.9250081380208335
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,4,4,128,1,float16,float16,0,1.9774719874064128
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,4,1,128,1,float16,float16,0,3.9239253997802734
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,4,2,128,1,float16,float16,0,3.9107840855916343
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,4,4,128,1,float16,fp8,0,1.9721387227376301
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,4,2,128,1,float16,fp8,0,3.9954827626546225
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16384,4,2,128,1,fp8,fp8,0,2.6147146224975586
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,4,1,128,1,float16,float16,0,2.048901398976644
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,4,1,128,1,float16,fp8,0,2.019983927408854
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,4,1,128,1,fp8,fp8,0,1.3622399965922039
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,4,4,128,1,fp8,fp8,0,1.3404107093811035
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,4,4,128,1,float16,float16,0,1.0562773545583088
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,4,2,128,1,float16,float16,0,2.0308213233947754
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,4,2,128,1,float16,fp8,0,2.027498722076416
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16384,4,2,128,1,fp8,fp8,0,1.3683093388875325
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,4,1,128,1,float16,float16,0,1.1012852986653645
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,4,4,128,1,fp8,fp8,0,0.6545120080312093
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,4,4,128,1,float16,fp8,0,1.0717226664225261
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,4,2,128,1,float16,float16,0,1.0985120137532551
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,4,1,128,1,float16,fp8,0,1.0968906879425049
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,4,2,128,1,fp8,fp8,0,0.678885300954183
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,4,1,128,1,fp8,fp8,0,0.6734933058420817
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16384,4,2,128,1,float16,fp8,0,1.0951626300811768
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,4,4,128,1,float16,float16,0,0.4977866808573405
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,4,1,128,1,float16,float16,0,0.5048373142878214
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,4,1,128,1,float16,fp8,0,0.5034026702245077
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,4,4,128,1,float16,fp8,0,0.4904640118281047
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,4,2,128,1,float16,float16,0,0.5055946509043375
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,4,2,128,1,float16,fp8,0,0.5019359985987345
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,4,1,128,1,fp8,fp8,0,0.35806934038798016
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,4,2,128,1,fp8,fp8,0,0.36522666613260907
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16384,4,4,128,1,fp8,fp8,0,0.34833065668741864
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,4,1,128,1,float16,fp8,0,2.3110453287760415
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,4,1,128,1,float16,float16,0,2.3161333401997886
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,4,1,128,1,fp8,fp8,0,1.550831953684489
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,4,2,128,1,float16,float16,0,2.2882986068725586
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,4,4,128,1,float16,float16,0,1.192021369934082
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,4,2,128,1,float16,fp8,0,2.294384002685547
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,4,4,128,1,float16,fp8,0,1.1912000179290771
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,4,1,128,1,float16,float16,0,1.211290677388509
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,4,1,128,1,float16,fp8,0,1.2078453699747722
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,12288,4,2,128,1,fp8,fp8,0,1.564842700958252
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,4,4,128,1,fp8,fp8,0,0.7767573197682699
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,4,1,128,1,fp8,fp8,0,0.7745013236999512
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,4,2,128,1,float16,float16,0,1.2232960065205891
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,4,2,128,1,float16,fp8,0,1.2103839715321858
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,12288,4,2,128,1,fp8,fp8,0,0.7916959921518961
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,4,4,128,1,float16,float16,0,0.5645066499710083
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,4,4,128,1,float16,fp8,0,0.5645813147226969
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,4,1,128,1,float16,float16,0,0.575760006904602
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,4,4,128,1,fp8,fp8,0,0.39910932381947833
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,4,1,128,1,float16,fp8,0,0.5665920178095499
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,4,1,128,1,fp8,fp8,0,0.4099946816762288
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,4,2,128,1,float16,float16,0,0.5751253366470337
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,4,4,128,1,float16,float16,0,0.31958399216334027
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,4,2,128,1,fp8,fp8,0,0.4124586582183838
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,4,4,128,1,float16,fp8,0,0.31885333855946857
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,12288,4,2,128,1,float16,fp8,0,0.5747573375701904
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,4,1,128,1,float16,float16,0,0.32686400413513184
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,4,4,128,1,fp8,fp8,0,0.2302079995473226
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,4,1,128,1,float16,fp8,0,0.3257066607475281
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,4,1,128,1,fp8,fp8,0,0.2333973248799642
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,4,2,128,1,float16,float16,0,0.33163734277089435
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,4,2,128,1,fp8,fp8,0,0.2381813327471415
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,12288,4,2,128,1,float16,fp8,0,0.32758933305740356
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,4,1,128,1,float16,fp8,0,1.6276319821675618
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,4,1,128,1,fp8,fp8,0,1.1305546760559082
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,4,1,128,1,float16,float16,0,1.6672533353169758
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,4,4,128,1,float16,float16,0,0.8685226440429688
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,4,2,128,1,float16,fp8,0,1.6503732999165852
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,4,4,128,1,float16,fp8,0,0.8586986859639486
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,4,2,128,1,fp8,fp8,0,1.1459946632385254
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,10240,4,2,128,1,float16,float16,0,1.6692479451497395
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,4,1,128,1,float16,float16,0,0.8801706631978353
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,4,4,128,1,fp8,fp8,0,0.560917337735494
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,4,1,128,1,fp8,fp8,0,0.566373348236084
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,4,1,128,1,float16,fp8,0,0.8806613286336263
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,4,4,128,1,float16,float16,0,0.41738665103912354
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,4,2,128,1,float16,float16,0,0.8839999834696451
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,4,2,128,1,fp8,fp8,0,0.5761386553446451
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,10240,4,2,128,1,float16,fp8,0,0.8786133130391439
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,4,1,128,1,float16,float16,0,0.42525867621103924
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,4,4,128,1,float16,fp8,0,0.41178667545318604
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,4,4,128,1,fp8,fp8,0,0.29983999331792194
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,4,1,128,1,fp8,fp8,0,0.3064906597137451
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,4,1,128,1,float16,fp8,0,0.42019200325012207
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,4,2,128,1,float16,fp8,0,0.4247359832127889
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,4,2,128,1,float16,float16,0,0.4248533248901367
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,10240,4,2,128,1,fp8,fp8,0,0.30802667140960693
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,4,4,128,1,float16,float16,0,0.2348533272743225
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,4,4,128,1,fp8,fp8,0,0.17301867405573526
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,4,1,128,1,float16,float16,0,0.2392266591389974
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,4,1,128,1,fp8,fp8,0,0.1721973419189453
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,4,4,128,1,float16,fp8,0,0.2317919929822286
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,4,1,128,1,float16,fp8,0,0.23862399657567343
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,4,2,128,1,float16,float16,0,0.23983466625213623
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,4,2,128,1,float16,fp8,0,0.23878933986028036
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,10240,4,2,128,1,fp8,fp8,0,0.17529600858688354
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,4,1,128,1,float16,float16,0,2.1215786933898926
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,4,1,128,1,float16,fp8,0,2.131077289581299
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,4,1,128,1,fp8,fp8,0,1.478864034016927
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,4,4,128,1,float16,float16,0,1.0924746990203857
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,4,4,128,1,float16,fp8,0,1.1001066366831462
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,4,2,128,1,fp8,fp8,0,1.4956372578938801
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,4,2,128,1,float16,float16,0,2.143967946370443
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,8192,4,2,128,1,float16,fp8,0,2.1545653343200684
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,4,1,128,1,float16,float16,0,1.1195039749145508
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,4,1,128,1,float16,fp8,0,1.1064480145772297
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,4,1,128,1,fp8,fp8,0,0.7767679691314697
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,4,4,128,1,fp8,fp8,0,0.771941343943278
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,4,2,128,1,float16,fp8,0,1.1116426785786946
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,4,2,128,1,float16,float16,0,1.1278026898701985
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,8192,4,2,128,1,fp8,fp8,0,0.7857279777526855
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,4,1,128,1,float16,float16,0,0.6045440038045248
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,4,4,128,1,float16,float16,0,0.5954293409983317
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,4,4,128,1,float16,fp8,0,0.5932426850001017
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,4,1,128,1,float16,fp8,0,0.5975786844889323
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,4,4,128,1,fp8,fp8,0,0.3914346694946289
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,4,1,128,1,fp8,fp8,0,0.39021865526835126
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,4,2,128,1,float16,float16,0,0.6098133325576782
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,4,2,128,1,float16,fp8,0,0.6010933319727579
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,8192,4,2,128,1,fp8,fp8,0,0.3979946772257487
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,4,1,128,1,float16,float16,0,0.2821706732114156
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,4,4,128,1,float16,fp8,0,0.2786933382352193
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,4,4,128,1,fp8,fp8,0,0.20852800210316977
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,4,1,128,1,fp8,fp8,0,0.21024000644683838
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,4,1,128,1,float16,fp8,0,0.27831466992696124
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,4,4,128,1,float16,float16,0,0.2800533374150594
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,4,2,128,1,float16,fp8,0,0.28084800640741986
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,4,2,128,1,fp8,fp8,0,0.21289066473642984
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,8192,4,2,128,1,float16,float16,0,0.2834826707839966
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,4,4,128,1,float16,fp8,0,0.15056000153223673
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,4,4,128,1,float16,float16,0,0.15250666936238608
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,4,4,128,1,fp8,fp8,0,0.11679466565450032
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,4,1,128,1,float16,float16,0,0.1534773310025533
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,4,1,128,1,fp8,fp8,0,0.11381333072980244
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,4,2,128,1,float16,float16,0,0.1543839971224467
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,4,2,128,1,float16,fp8,0,0.15362667044003805
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,4,1,128,1,float16,fp8,0,0.1534826656182607
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,8192,4,2,128,1,fp8,fp8,0,0.11534933249155681
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,4,1,128,1,float16,fp8,0,1.2860480149586995
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,4,1,128,1,fp8,fp8,0,0.9160266717274984
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,4,1,128,1,float16,float16,0,1.3006772994995117
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,4,4,128,1,float16,fp8,0,0.6644320090611776
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,4,2,128,1,float16,float16,0,1.2997653484344482
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,4,4,128,1,float16,float16,0,0.6795946756998698
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,4,2,128,1,float16,fp8,0,1.2913333574930828
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,6144,4,2,128,1,fp8,fp8,0,0.932639996210734
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,4,1,128,1,float16,float16,0,0.6796960035959879
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,4,1,128,1,float16,fp8,0,0.675706704457601
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,4,1,128,1,fp8,fp8,0,0.46159998575846356
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,4,4,128,1,fp8,fp8,0,0.47809600830078125
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,4,2,128,1,float16,float16,0,0.6855306625366211
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,4,2,128,1,fp8,fp8,0,0.47564268112182617
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,4,4,128,1,float16,float16,0,0.3279573321342468
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,4,4,128,1,float16,fp8,0,0.32577067613601685
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,6144,4,2,128,1,float16,fp8,0,0.6835359732309977
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,4,1,128,1,float16,float16,0,0.3257066607475281
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,4,4,128,1,fp8,fp8,0,0.2492319941520691
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,4,1,128,1,float16,fp8,0,0.32445865869522095
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,4,1,128,1,fp8,fp8,0,0.2469546596209208
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,4,2,128,1,float16,float16,0,0.3317226568857829
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,4,2,128,1,float16,fp8,0,0.32607465982437134
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,4,1,128,1,float16,float16,0,0.1859626571337382
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,6144,4,2,128,1,fp8,fp8,0,0.25066665808359784
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,4,4,128,1,float16,fp8,0,0.1819146672884623
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,4,4,128,1,fp8,fp8,0,0.14174399773279825
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,4,1,128,1,float16,fp8,0,0.18297600746154785
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,4,4,128,1,float16,float16,0,0.18491733074188232
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,4,1,128,1,fp8,fp8,0,0.13709333539009094
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,4,2,128,1,fp8,fp8,0,0.14270400007565817
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,4,2,128,1,float16,fp8,0,0.1868000030517578
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,4,4,128,1,float16,float16,0,0.1130400002002716
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,4,4,128,1,fp8,fp8,0,0.08715200424194336
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,4,4,128,1,float16,fp8,0,0.11380267143249512
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,6144,4,2,128,1,float16,float16,0,0.1872160037358602
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,4,1,128,1,float16,fp8,0,0.11572800079981486
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,4,1,128,1,fp8,fp8,0,0.08724799752235413
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,4,2,128,1,float16,float16,0,0.11780266960461934
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,4,2,128,1,float16,fp8,0,0.11603200435638428
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,4,2,128,1,fp8,fp8,0,0.08797333637873332
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,6144,4,1,128,1,float16,float16,0,0.116976002852122
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,4,1,128,1,float16,float16,0,1.2841866811116536
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,4,1,128,1,fp8,fp8,0,0.9349653720855713
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,4,1,128,1,float16,fp8,0,1.245685338973999
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,4,4,128,1,float16,float16,0,0.6581759850184122
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,4,2,128,1,float16,float16,0,1.2929386297861736
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,4,2,128,1,float16,fp8,0,1.2695679664611816
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,4,4,128,1,float16,fp8,0,0.6571466525395712
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,4096,4,2,128,1,fp8,fp8,0,0.9543733596801758
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,4,1,128,1,float16,float16,0,0.6628959973653158
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,4,1,128,1,fp8,fp8,0,0.4894346793492635
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,4,4,128,1,fp8,fp8,0,0.5027679999669393
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,4,1,128,1,float16,fp8,0,0.6472693284352621
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,4,4,128,1,float16,float16,0,0.35331201553344727
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,4,2,128,1,float16,float16,0,0.6642933289210001
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,4,2,128,1,float16,fp8,0,0.6575680176417033
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,4,1,128,1,float16,float16,0,0.3560853401819865
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,4096,4,2,128,1,fp8,fp8,0,0.5022506713867188
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,4,4,128,1,float16,fp8,0,0.35332798957824707
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,4,1,128,1,float16,fp8,0,0.3510613441467285
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,4,4,128,1,fp8,fp8,0,0.2579786578814189
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,4,1,128,1,fp8,fp8,0,0.24814399083455405
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,4,2,128,1,float16,float16,0,0.35868267218271893
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,4,2,128,1,float16,fp8,0,0.35715198516845703
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,4096,4,2,128,1,fp8,fp8,0,0.2537013292312622
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,4,4,128,1,float16,fp8,0,0.17190933227539062
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,4,1,128,1,float16,float16,0,0.16989866892496744
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,4,1,128,1,float16,fp8,0,0.16724799076716104
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,4,4,128,1,float16,float16,0,0.1734453241030375
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,4,1,128,1,fp8,fp8,0,0.13524267077445984
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,4,2,128,1,float16,float16,0,0.17263466119766235
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,4,4,128,1,fp8,fp8,0,0.13937600453694662
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,4,4,128,1,float16,float16,0,0.09341333309809367
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,4,2,128,1,float16,fp8,0,0.1707786719004313
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,4096,4,2,128,1,fp8,fp8,0,0.13793067137400308
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,4,4,128,1,float16,fp8,0,0.09335999687512715
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,4,1,128,1,float16,float16,0,0.09289600451787312
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,4,1,128,1,float16,fp8,0,0.09227733810742696
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,4,2,128,1,float16,float16,0,0.09341866771380107
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,4,4,128,1,fp8,fp8,0,0.0794293334086736
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,4,1,128,1,fp8,fp8,0,0.0732586681842804
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,4,2,128,1,fp8,fp8,0,0.07524799803892772
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,4096,4,2,128,1,float16,fp8,0,0.09282666444778442
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,4,4,128,1,float16,float16,0,0.07932800054550171
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,4,4,128,1,float16,fp8,0,0.07926933467388153
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,4,4,128,1,fp8,fp8,0,0.06226666768391927
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,4,1,128,1,float16,float16,0,0.08177066842714946
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,4,2,128,1,float16,float16,0,0.08208000163237254
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,4,2,128,1,float16,fp8,0,0.08153066535790761
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,4,1,128,1,fp8,fp8,0,0.06233599781990051
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,4,2,128,1,fp8,fp8,0,0.06307200094064076
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,4096,4,1,128,1,float16,fp8,0,0.08124800026416779
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,4,1,128,1,float16,float16,0,0.7955573399861654
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,4,1,128,1,float16,fp8,0,0.7859679857889811
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,4,1,128,1,fp8,fp8,0,0.6055946747461954
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,4,4,128,1,float16,float16,0,0.41732800006866455
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,4,2,128,1,float16,fp8,0,0.7948160171508789
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,4,2,128,1,float16,float16,0,0.8071359793345133
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,4,4,128,1,float16,fp8,0,0.41997333367665607
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,4,1,128,1,float16,float16,0,0.41934935251871747
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,4,1,128,1,float16,fp8,0,0.4113706747690837
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,3072,4,2,128,1,fp8,fp8,0,0.621616005897522
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,4,4,128,1,fp8,fp8,0,0.3314239978790283
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,4,1,128,1,fp8,fp8,0,0.3075466752052307
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,4,2,128,1,float16,float16,0,0.4226293166478475
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,4,2,128,1,float16,fp8,0,0.4167146682739258
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,4,4,128,1,float16,float16,0,0.21272534132003784
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,4,4,128,1,float16,fp8,0,0.20996799071629843
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,4,1,128,1,float16,float16,0,0.20330133040746054
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,4,4,128,1,fp8,fp8,0,0.17165333032608032
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,4,1,128,1,float16,fp8,0,0.20156800746917725
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,4,1,128,1,fp8,fp8,0,0.16455466548601785
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,3072,4,2,128,1,fp8,fp8,0,0.3197279969851176
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,4,2,128,1,float16,float16,0,0.2094879945119222
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,4,2,128,1,float16,fp8,0,0.20785599946975708
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,4,4,128,1,float16,float16,0,0.11545600493748982
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,3072,4,2,128,1,fp8,fp8,0,0.16779732704162598
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,4,4,128,1,float16,fp8,0,0.11538666486740112
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,4,4,128,1,fp8,fp8,0,0.09735999504725139
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,4,1,128,1,float16,float16,0,0.1129813293615977
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,4,1,128,1,float16,fp8,0,0.11282133062680562
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,4,1,128,1,fp8,fp8,0,0.08938666184743245
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,4,2,128,1,float16,float16,0,0.11559466520945232
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,4,2,128,1,float16,fp8,0,0.11457066734631856
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,3072,4,2,128,1,fp8,fp8,0,0.094458669424057
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,4,4,128,1,float16,float16,0,0.0685280015071233
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,4,4,128,1,float16,fp8,0,0.06921066840489705
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,4,4,128,1,fp8,fp8,0,0.057861333092053734
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,4,1,128,1,float16,fp8,0,0.06914666791756947
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,4,1,128,1,float16,float16,0,0.07002133131027222
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,4,1,128,1,fp8,fp8,0,0.05611200133959452
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,4,2,128,1,float16,float16,0,0.069733331600825
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,4,2,128,1,float16,fp8,0,0.06949333349863689
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,3072,4,2,128,1,fp8,fp8,0,0.056703999638557434
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,4,4,128,1,float16,float16,0,0.0628959983587265
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,4,4,128,1,float16,fp8,0,0.06268266836802165
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,4,1,128,1,float16,fp8,0,0.06410133341948192
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,4,4,128,1,fp8,fp8,0,0.05002133548259735
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,4,1,128,1,fp8,fp8,0,0.04993600149949392
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,4,1,128,1,float16,float16,0,0.06488533318042755
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,4,2,128,1,float16,float16,0,0.06489066779613495
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,4,2,128,1,fp8,fp8,0,0.0505920002857844
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,3072,4,2,128,1,float16,fp8,0,0.06448533137639363
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,4,1,128,1,float16,float16,0,0.8729173342386881
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,4,1,128,1,fp8,fp8,0,0.6967840194702148
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,4,1,128,1,float16,fp8,0,0.8191786607106527
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,4,4,128,1,float16,fp8,0,0.4350666602452596
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,4,4,128,1,float16,float16,0,0.45741868019104004
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,4,2,128,1,fp8,fp8,0,0.7137653032938639
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,4,2,128,1,float16,float16,0,0.882800022761027
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,2048,4,2,128,1,float16,fp8,0,0.831168015797933
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,4,1,128,1,float16,float16,0,0.4509333372116089
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,4,1,128,1,float16,fp8,0,0.42844800154368085
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,4,1,128,1,fp8,fp8,0,0.3596160014470418
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,4,4,128,1,fp8,fp8,0,0.3808533350626628
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,4,2,128,1,float16,float16,0,0.45419732729593915
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,4,2,128,1,float16,fp8,0,0.4322293202082316
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,2048,4,2,128,1,fp8,fp8,0,0.36907732486724854
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,4,4,128,1,float16,float16,0,0.24456000328063965
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,4,1,128,1,float16,float16,0,0.23667200406392416
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,4,4,128,1,float16,fp8,0,0.23660266399383545
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,4,4,128,1,fp8,fp8,0,0.19644266366958618
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,4,1,128,1,float16,fp8,0,0.22846933205922446
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,4,1,128,1,fp8,fp8,0,0.18150399128595987
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,4,2,128,1,float16,float16,0,0.24023999770482382
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,4,2,128,1,float16,fp8,0,0.23125867048899332
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,4,4,128,1,float16,fp8,0,0.11872532963752747
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,4,4,128,1,float16,float16,0,0.1202346682548523
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,2048,4,2,128,1,fp8,fp8,0,0.1879146695137024
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,4,4,128,1,fp8,fp8,0,0.10629866520563762
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,4,1,128,1,float16,float16,0,0.1153706709543864
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,4,1,128,1,float16,fp8,0,0.11261333028475444
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,4,1,128,1,fp8,fp8,0,0.09888000289599101
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,4,2,128,1,float16,float16,0,0.11742400129636128
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,4,2,128,1,float16,fp8,0,0.11582932869593303
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,2048,4,2,128,1,fp8,fp8,0,0.1014400025208791
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,4,4,128,1,float16,fp8,0,0.06550933420658112
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,4,4,128,1,float16,float16,0,0.06572799881299336
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,4,4,128,1,fp8,fp8,0,0.06044266621271769
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,4,1,128,1,float16,float16,0,0.06307733555634816
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,4,1,128,1,float16,fp8,0,0.06189866860707601
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,4,1,128,1,fp8,fp8,0,0.053317333261171974
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,4,2,128,1,float16,float16,0,0.06388799846172333
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,4,2,128,1,float16,fp8,0,0.06273066500822704
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,2048,4,2,128,1,fp8,fp8,0,0.05494933327039083
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,4,1,128,1,float16,float16,0,0.051327998439470925
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,4,4,128,1,float16,fp8,0,0.04981866478919983
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,4,4,128,1,fp8,fp8,0,0.04262933135032654
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,4,4,128,1,float16,float16,0,0.050330668687820435
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,4,1,128,1,float16,fp8,0,0.05048533280690511
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,4,1,128,1,fp8,fp8,0,0.041759997606277466
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,4,2,128,1,float16,fp8,0,0.05083199838797251
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,4,2,128,1,float16,float16,0,0.051413332422574363
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,2048,4,2,128,1,fp8,fp8,0,0.04238933324813843
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,4,4,128,1,float16,float16,0,0.04643199841181437
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,4,1,128,1,float16,float16,0,0.04754666487375895
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,4,4,128,1,float16,fp8,0,0.04601066807905833
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,4,4,128,1,fp8,fp8,0,0.038058665891488395
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,4,1,128,1,float16,fp8,0,0.047269334395726524
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,4,1,128,1,fp8,fp8,0,0.03788800040880839
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,4,2,128,1,float16,fp8,0,0.04746133089065552
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,4,2,128,1,fp8,fp8,0,0.03818666686614355
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,2048,4,2,128,1,float16,float16,0,0.04763199885686239
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,4,1,128,1,float16,float16,0,0.5784106651941935
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,4,1,128,1,fp8,fp8,0,0.4859306812286377
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,4,4,128,1,float16,float16,0,0.31378666559855145
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,4,4,128,1,float16,fp8,0,0.2927253246307373
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,4,1,128,1,float16,fp8,0,0.5260159969329834
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,4,2,128,1,float16,float16,0,0.5935680071512858
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,4,2,128,1,float16,fp8,0,0.5404906670252482
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1536,4,2,128,1,fp8,fp8,0,0.49073068300882977
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,4,4,128,1,fp8,fp8,0,0.2659733295440674
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,4,1,128,1,float16,fp8,0,0.28038932879765827
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,4,1,128,1,float16,float16,0,0.30429333448410034
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,4,1,128,1,fp8,fp8,0,0.24090667565663657
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,4,4,128,1,float16,float16,0,0.16035200158754984
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,4,2,128,1,float16,fp8,0,0.28588799635569256
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,4,2,128,1,fp8,fp8,0,0.25217066208521527
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1536,4,2,128,1,float16,float16,0,0.30853333075841266
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,4,1,128,1,float16,fp8,0,0.14324266711870828
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,4,1,128,1,float16,float16,0,0.14866666992505392
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,4,4,128,1,float16,fp8,0,0.15407466888427734
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,4,1,128,1,fp8,fp8,0,0.12905066212018332
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,4,4,128,1,fp8,fp8,0,0.13802133003870645
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,4,2,128,1,float16,float16,0,0.15370666980743408
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,4,2,128,1,float16,fp8,0,0.14755733807881674
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,4,1,128,1,float16,float16,0,0.0780320018529892
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,4,1,128,1,float16,fp8,0,0.0778186668952306
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1536,4,2,128,1,fp8,fp8,0,0.13307199875513712
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,4,4,128,1,float16,float16,0,0.08452266454696655
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,4,1,128,1,fp8,fp8,0,0.06876266499360402
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,4,4,128,1,float16,fp8,0,0.08385066191355388
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,4,2,128,1,float16,float16,0,0.08208000163237254
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,4,4,128,1,fp8,fp8,0,0.07821866869926453
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,4,1,128,1,float16,fp8,0,0.04643199841181437
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,4,2,128,1,float16,fp8,0,0.0801386684179306
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1536,4,2,128,1,fp8,fp8,0,0.07348800202210744
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,4,4,128,1,float16,fp8,0,0.04758933186531067
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,4,4,128,1,fp8,fp8,0,0.04423999786376953
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,4,1,128,1,float16,float16,0,0.047728002071380615
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,4,4,128,1,float16,float16,0,0.04762133459250132
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,4,2,128,1,float16,float16,0,0.04760533571243286
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,4,2,128,1,fp8,fp8,0,0.04144000013669332
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,4,2,128,1,float16,fp8,0,0.04678933322429657
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1536,4,1,128,1,fp8,fp8,0,0.04046933352947235
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,4,4,128,1,float16,fp8,0,0.041189332803090416
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,4,4,128,1,float16,float16,0,0.041637333730856575
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,4,1,128,1,float16,fp8,0,0.04141866664091746
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,4,4,128,1,fp8,fp8,0,0.035349334279696144
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,4,1,128,1,fp8,fp8,0,0.03462400039037069
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,4,2,128,1,float16,float16,0,0.04223466912905375
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,4,1,128,1,float16,float16,0,0.042170668641726174
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,4,2,128,1,fp8,fp8,0,0.035317334036032356
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,4,4,128,1,float16,fp8,0,0.03812266637881597
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1536,4,2,128,1,float16,fp8,0,0.04171733558177948
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,4,1,128,1,float16,float16,0,0.039359999199708305
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,4,1,128,1,float16,fp8,0,0.038975998759269714
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,4,4,128,1,float16,float16,0,0.038202665746212006
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,4,4,128,1,fp8,fp8,0,0.03178666780392329
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,4,2,128,1,float16,float16,0,0.039279999832312264
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,4,2,128,1,float16,fp8,0,0.03916800022125244
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,4,1,128,1,fp8,fp8,0,0.031925333042939506
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1536,4,2,128,1,fp8,fp8,0,0.03209066639343897
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,4,1,128,1,float16,float16,0,0.5287573337554932
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,4,1,128,1,float16,fp8,0,0.5228533347447714
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,4,2,128,1,float16,fp8,0,0.5333066781361898
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,4,2,128,1,fp8,fp8,0,0.4679786761601766
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,4,1,128,1,fp8,fp8,0,0.43753600120544434
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,4,4,128,1,float16,float16,0,0.2943039933840434
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,1024,4,2,128,1,float16,float16,0,0.5393120050430298
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,4,4,128,1,float16,fp8,0,0.28728532791137695
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,4,1,128,1,float16,float16,0,0.2747573256492615
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,4,1,128,1,float16,fp8,0,0.2741760015487671
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,4,1,128,1,fp8,fp8,0,0.22906132539113364
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,4,4,128,1,fp8,fp8,0,0.2621226708094279
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,4,2,128,1,fp8,fp8,0,0.2427306572596232
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,4,4,128,1,float16,fp8,0,0.15681599577267966
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,4,2,128,1,float16,fp8,0,0.2771679957707723
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,1024,4,2,128,1,float16,float16,0,0.281546672185262
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,4,1,128,1,float16,float16,0,0.15042133132616678
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,4,1,128,1,float16,fp8,0,0.14893333117167154
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,4,4,128,1,float16,float16,0,0.15986667076746622
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,4,4,128,1,fp8,fp8,0,0.1307146648565928
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,4,2,128,1,float16,float16,0,0.1528320014476776
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,4,1,128,1,fp8,fp8,0,0.11610666910807292
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,4,2,128,1,float16,fp8,0,0.15214932958285013
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,1024,4,2,128,1,fp8,fp8,0,0.12135466933250427
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,4,4,128,1,float16,fp8,0,0.08145066599051158
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,4,1,128,1,float16,float16,0,0.07603199779987335
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,4,4,128,1,float16,float16,0,0.08507733543713887
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,4,4,128,1,fp8,fp8,0,0.07239466905593872
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,4,1,128,1,float16,fp8,0,0.07561066746711731
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,4,1,128,1,fp8,fp8,0,0.06486399968465169
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,4,2,128,1,float16,float16,0,0.07826666533946991
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,4,4,128,1,float16,fp8,0,0.0462666650613149
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,4,4,128,1,float16,float16,0,0.04654933512210846
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,4,2,128,1,fp8,fp8,0,0.06779733300209045
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,1024,4,2,128,1,float16,fp8,0,0.07795733213424683
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,4,1,128,1,float16,float16,0,0.04274666806062063
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,4,1,128,1,float16,fp8,0,0.04292266567548116
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,4,2,128,1,float16,float16,0,0.043605332573254905
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,4,4,128,1,fp8,fp8,0,0.04201066493988037
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,4,1,128,1,fp8,fp8,0,0.03694933404525121
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,4,2,128,1,float16,fp8,0,0.04332800209522247
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,1024,4,2,128,1,fp8,fp8,0,0.038592000802357994
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,4,4,128,1,float16,fp8,0,0.03260799994071325
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,4,4,128,1,float16,float16,0,0.03321066747109095
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,4,1,128,1,float16,float16,0,0.032101333141326904
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,4,2,128,1,float16,fp8,0,0.03234666585922241
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,4,2,128,1,float16,float16,0,0.03294933338960012
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,4,4,128,1,fp8,fp8,0,0.02938133229811986
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,4,1,128,1,float16,fp8,0,0.03222399950027466
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,4,1,128,1,fp8,fp8,0,0.027466667195161183
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,4,4,128,1,float16,fp8,0,0.029306667546431225
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,4,4,128,1,float16,float16,0,0.02945599953333537
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,1024,4,2,128,1,fp8,fp8,0,0.028042666614055634
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,4,4,128,1,fp8,fp8,0,0.024906667570273083
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,4,1,128,1,float16,float16,0,0.02922133356332779
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,4,1,128,1,float16,fp8,0,0.029498666524887085
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,4,1,128,1,fp8,fp8,0,0.024613333245118458
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,4,2,128,1,float16,fp8,0,0.029792000850041706
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,4,2,128,1,float16,float16,0,0.029487999776999157
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,4,4,128,1,float16,float16,0,0.02756800005833308
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,4,4,128,1,float16,fp8,0,0.027589333554108936
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,1024,4,2,128,1,fp8,fp8,0,0.025226667523384094
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,4,4,128,1,fp8,fp8,0,0.023247999449570973
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,4,1,128,1,float16,float16,0,0.028079998989899952
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,4,1,128,1,float16,fp8,0,0.028522667785485584
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,4,2,128,1,fp8,fp8,0,0.023711999257405598
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,4,2,128,1,float16,fp8,0,0.028725333511829376
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,4,2,128,1,float16,float16,0,0.02845333268245061
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,1024,4,1,128,1,fp8,fp8,0,0.023306667804718018
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,4,1,128,1,float16,float16,0,0.41923733552296955
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,4,1,128,1,fp8,fp8,0,0.3699733416239421
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,4,1,128,1,float16,fp8,0,0.41595733165740967
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,4,4,128,1,float16,fp8,0,0.23818665742874146
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,4,4,128,1,float16,float16,0,0.24238399664560953
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,4,2,128,1,float16,fp8,0,0.42715199788411456
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,4,2,128,1,fp8,fp8,0,0.3981920083363851
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,512,4,2,128,1,float16,float16,0,0.4303733507792155
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,4,1,128,1,float16,fp8,0,0.21712533632914224
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,4,1,128,1,float16,float16,0,0.21827733516693115
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,4,1,128,1,fp8,fp8,0,0.1922453244527181
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,4,2,128,1,float16,float16,0,0.22614399592081705
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,4,2,128,1,float16,fp8,0,0.22233599424362183
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,4,2,128,1,fp8,fp8,0,0.20441599686940512
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,4,4,128,1,float16,float16,0,0.1316266655921936
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,4,4,128,1,float16,fp8,0,0.12909332911173502
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,512,4,4,128,1,fp8,fp8,0,0.22326399882634482
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,4,1,128,1,float16,float16,0,0.11947733163833618
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,4,4,128,1,fp8,fp8,0,0.11322666207949321
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,4,1,128,1,fp8,fp8,0,0.10001599788665771
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,4,1,128,1,float16,fp8,0,0.1186346709728241
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,4,4,128,1,float16,float16,0,0.0726453314224879
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,4,2,128,1,float16,float16,0,0.12287466724713643
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,4,2,128,1,fp8,fp8,0,0.10566400488217671
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,4,4,128,1,float16,fp8,0,0.07056533296902974
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,512,4,2,128,1,float16,fp8,0,0.12062399586041768
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,4,4,128,1,fp8,fp8,0,0.06498133142789204
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,4,1,128,1,float16,float16,0,0.06262933214505513
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,4,1,128,1,float16,fp8,0,0.06275733311971028
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,4,1,128,1,fp8,fp8,0,0.056874667604764305
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,4,2,128,1,fp8,fp8,0,0.059717332323392235
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,4,2,128,1,float16,float16,0,0.06620266536871593
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,4,4,128,1,float16,fp8,0,0.039477333426475525
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,512,4,2,128,1,float16,fp8,0,0.06534400085608165
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,4,4,128,1,float16,float16,0,0.039877332746982574
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,4,4,128,1,fp8,fp8,0,0.03783999880154928
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,4,1,128,1,float16,float16,0,0.03658666710058848
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,4,1,128,1,float16,fp8,0,0.036373332142829895
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,4,1,128,1,fp8,fp8,0,0.03332799921433131
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,4,2,128,1,float16,float16,0,0.03728000074625015
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,4,2,128,1,float16,fp8,0,0.03770666569471359
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,512,4,2,128,1,fp8,fp8,0,0.034634667138258614
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,4,4,128,1,float16,float16,0,0.026677332818508148
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,4,4,128,1,float16,fp8,0,0.026799999177455902
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,4,1,128,1,float16,fp8,0,0.025888000925381977
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,4,1,128,1,float16,float16,0,0.02532800038655599
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,4,1,128,1,fp8,fp8,0,0.023445333043734234
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,4,2,128,1,float16,float16,0,0.026000000536441803
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,4,2,128,1,float16,fp8,0,0.02601066728432973
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,4,2,128,1,fp8,fp8,0,0.02401600033044815
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,512,4,4,128,1,fp8,fp8,0,0.02479466547568639
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,4,4,128,1,float16,float16,0,0.022986667851607006
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,4,4,128,1,float16,fp8,0,0.02327466756105423
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,4,4,128,1,fp8,fp8,0,0.021194666624069214
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,4,1,128,1,float16,float16,0,0.022554665803909302
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,4,1,128,1,fp8,fp8,0,0.02059200033545494
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,4,1,128,1,float16,fp8,0,0.0227360005180041
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,4,2,128,1,float16,float16,0,0.022543999056021374
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,4,4,128,1,float16,float16,0,0.021040000021457672
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,4,2,128,1,float16,fp8,0,0.02272533377011617
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,512,4,2,128,1,fp8,fp8,0,0.0207893339296182
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,4,4,128,1,float16,fp8,0,0.02195200075705846
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,4,4,128,1,fp8,fp8,0,0.019445333629846573
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,4,1,128,1,float16,float16,0,0.021456000705560047
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,4,1,128,1,float16,fp8,0,0.02160533269246419
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,4,2,128,1,float16,float16,0,0.021744000415007275
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,4,2,128,1,fp8,fp8,0,0.019343999524911244
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,4,1,128,1,fp8,fp8,0,0.01912533367673556
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,512,4,2,128,1,float16,fp8,0,0.0220266655087471
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,4,4,128,1,float16,float16,0,0.02035733312368393
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,4,4,128,1,float16,fp8,0,0.020762667059898376
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,4,4,128,1,fp8,fp8,0,0.018464000274737675
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,4,1,128,1,float16,fp8,0,0.02170666555563609
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,4,1,128,1,float16,float16,0,0.020874666670958202
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,4,1,128,1,fp8,fp8,0,0.018922666708628338
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,4,2,128,1,float16,fp8,0,0.02164799968401591
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,4,2,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,512,4,2,128,1,float16,float16,0,0.020981334149837494
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,4,1,128,1,float16,fp8,0,0.18393067518870035
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,4,1,128,1,float16,float16,0,0.1848533352216085
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,4,1,128,1,fp8,fp8,0,0.16237866878509521
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,4,2,128,1,float16,float16,0,0.1934559941291809
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,4,2,128,1,float16,fp8,0,0.19084266821543375
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,4,4,128,1,float16,float16,0,0.12100266416867574
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,256,4,2,128,1,fp8,fp8,0,0.180074671904246
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,4,1,128,1,float16,float16,0,0.10058133800824483
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,4,1,128,1,float16,fp8,0,0.10143466790517171
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,4,4,128,1,float16,fp8,0,0.1165120005607605
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,4,4,128,1,fp8,fp8,0,0.1069599986076355
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,4,1,128,1,fp8,fp8,0,0.08987200260162354
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,4,2,128,1,float16,float16,0,0.10611733794212341
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,4,2,128,1,float16,fp8,0,0.10522133111953735
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,4,4,128,1,float16,fp8,0,0.0678719977537791
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,4,4,128,1,float16,float16,0,0.06885333359241486
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,4,4,128,1,fp8,fp8,0,0.05952000121275584
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,256,4,2,128,1,fp8,fp8,0,0.09454933802286784
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,4,1,128,1,float16,float16,0,0.05618133147557577
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,4,1,128,1,float16,fp8,0,0.056789333621660866
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,4,1,128,1,fp8,fp8,0,0.05068266888459524
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,4,2,128,1,float16,float16,0,0.05948266883691152
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,4,2,128,1,float16,fp8,0,0.05889600018660227
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,4,1,128,1,float16,float16,0,0.033957332372665405
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,4,4,128,1,float16,float16,0,0.03728533287843069
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,256,4,2,128,1,fp8,fp8,0,0.054192001620928444
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,4,4,128,1,fp8,fp8,0,0.035887998839219414
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,4,1,128,1,float16,fp8,0,0.03386666625738144
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,4,4,128,1,float16,fp8,0,0.03716266651948293
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,4,1,128,1,fp8,fp8,0,0.030917334059874218
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,4,2,128,1,float16,float16,0,0.03509333233038584
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,4,2,128,1,float16,fp8,0,0.03475199888149897
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,4,4,128,1,float16,float16,0,0.024735999604066212
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,4,4,128,1,fp8,fp8,0,0.023183998962243397
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,256,4,2,128,1,fp8,fp8,0,0.03206400076548258
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,4,1,128,1,float16,float16,0,0.0229066660006841
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,4,4,128,1,float16,fp8,0,0.024559999505678814
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,4,1,128,1,float16,fp8,0,0.023205332458019257
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,4,1,128,1,fp8,fp8,0,0.0215786670645078
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,4,2,128,1,float16,float16,0,0.02367999901374181
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,4,2,128,1,float16,fp8,0,0.02365333338578542
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,4,4,128,1,float16,float16,0,0.020213333268960316
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,4,4,128,1,float16,fp8,0,0.020394666741291683
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,256,4,2,128,1,fp8,fp8,0,0.022069332500298817
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,4,1,128,1,float16,float16,0,0.01937599976857503
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,4,4,128,1,fp8,fp8,0,0.01929066702723503
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,4,1,128,1,float16,fp8,0,0.019653332730134327
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,4,1,128,1,fp8,fp8,0,0.018533332894245785
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,4,1,128,1,float16,float16,0,0.018239999810854595
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,4,2,128,1,float16,fp8,0,0.01945066700379054
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,4,4,128,1,float16,fp8,0,0.01897066707412402
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,4,2,128,1,fp8,fp8,0,0.018751999984184902
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,256,4,2,128,1,float16,float16,0,0.019626667102177937
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,4,4,128,1,fp8,fp8,0,0.017301333447297413
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,4,4,128,1,float16,float16,0,0.018485333770513535
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,4,1,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,4,1,128,1,float16,fp8,0,0.01828266680240631
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,4,2,128,1,float16,float16,0,0.018570666511853535
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,4,2,128,1,float16,fp8,0,0.018485333770513535
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,256,4,2,128,1,fp8,fp8,0,0.017301333447297413
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,4,4,128,1,float16,float16,0,0.01728533332546552
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,4,4,128,1,float16,fp8,0,0.017935999979575474
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,4,4,128,1,fp8,fp8,0,0.016517333686351776
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,4,1,128,1,float16,float16,0,0.017797333498795826
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,4,1,128,1,float16,fp8,0,0.017952000101407368
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,4,2,128,1,float16,float16,0,0.01758933315674464
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,4,1,128,1,fp8,fp8,0,0.01669866715868314
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,4,2,128,1,float16,fp8,0,0.018197332819302876
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,256,4,2,128,1,fp8,fp8,0,0.016810666769742966
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,4,4,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,4,4,128,1,float16,float16,0,0.016714667280515034
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,4,4,128,1,fp8,fp8,0,0.016063999384641647
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,4,1,128,1,float16,fp8,0,0.017765333255132038
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,4,2,128,1,float16,float16,0,0.017103999853134155
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,4,2,128,1,float16,fp8,0,0.01762666677435239
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,4,2,128,1,fp8,fp8,0,0.016186666985352833
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,4,1,128,1,float16,float16,0,0.017093333105246227
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,256,4,1,128,1,fp8,fp8,0,0.016261332978804905
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,4,1,128,1,float16,float16,0,0.09726400176684062
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,4,1,128,1,float16,fp8,0,0.09673600395520528
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,4,1,128,1,fp8,fp8,0,0.08892266949017842
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,4,4,128,1,float16,float16,0,0.06659199794133504
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,4,2,128,1,float16,float16,0,0.10293866197268169
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,4,2,128,1,float16,fp8,0,0.10033599535624187
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,4,4,128,1,float16,fp8,0,0.06302933394908905
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,128,4,2,128,1,fp8,fp8,0,0.0956213374932607
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,4,4,128,1,fp8,fp8,0,0.05894933144251505
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,4,1,128,1,float16,float16,0,0.05342933535575867
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,4,1,128,1,float16,fp8,0,0.05421866476535797
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,4,1,128,1,fp8,fp8,0,0.05120000243186951
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,4,2,128,1,float16,fp8,0,0.05566933254400889
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,4,2,128,1,float16,float16,0,0.0565226674079895
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,4,4,128,1,float16,float16,0,0.03570666660865148
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,128,4,2,128,1,fp8,fp8,0,0.053946668903032936
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,4,4,128,1,float16,fp8,0,0.035589332381884255
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,4,4,128,1,fp8,fp8,0,0.03499733408292135
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,4,1,128,1,float16,float16,0,0.0320266659061114
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,4,1,128,1,fp8,fp8,0,0.030773334205150604
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,4,1,128,1,float16,fp8,0,0.03238933285077413
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,4,2,128,1,fp8,fp8,0,0.031680000325044
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,4,4,128,1,float16,fp8,0,0.0239680012067159
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,4,4,128,1,float16,float16,0,0.023946667710940044
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,4,2,128,1,float16,float16,0,0.03270400067170461
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,128,4,2,128,1,float16,fp8,0,0.032613334556420646
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,4,4,128,1,fp8,fp8,0,0.022757334013779957
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,4,1,128,1,float16,float16,0,0.02231466770172119
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,4,2,128,1,float16,float16,0,0.02269333352645238
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,4,1,128,1,fp8,fp8,0,0.021301334102948506
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,4,1,128,1,float16,fp8,0,0.022533332308133442
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,4,2,128,1,float16,fp8,0,0.023034666975339253
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,128,4,2,128,1,fp8,fp8,0,0.022085333863894146
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,4,4,128,1,float16,float16,0,0.018730666488409042
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,4,4,128,1,float16,fp8,0,0.018794666975736618
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,4,1,128,1,float16,float16,0,0.018191999445358913
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,4,4,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,4,1,128,1,fp8,fp8,0,0.018506667266289394
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,4,2,128,1,float16,float16,0,0.018624000251293182
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,4,2,128,1,float16,fp8,0,0.018506667266289394
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,4,1,128,1,float16,fp8,0,0.018320000420014065
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,4,4,128,1,float16,fp8,0,0.01735466718673706
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,128,4,2,128,1,fp8,fp8,0,0.018687999496857326
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,4,4,128,1,float16,float16,0,0.017024000485738117
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,4,4,128,1,fp8,fp8,0,0.01729600007335345
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,4,1,128,1,float16,float16,0,0.016719999412695568
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,4,1,128,1,float16,fp8,0,0.017279999951521557
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,4,1,128,1,fp8,fp8,0,0.017136000096797943
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,4,2,128,1,float16,fp8,0,0.017530667285124462
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,4,4,128,1,float16,float16,0,0.01613866661985715
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,4,2,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,4,4,128,1,float16,fp8,0,0.016469333320856094
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,128,4,2,128,1,float16,float16,0,0.01682666689157486
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,4,4,128,1,fp8,fp8,0,0.016496000190575916
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,4,1,128,1,float16,float16,0,0.016421332955360413
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,4,1,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,4,1,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,4,2,128,1,float16,float16,0,0.01623999948302905
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,4,2,128,1,float16,fp8,0,0.01666133354107539
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,128,4,2,128,1,fp8,fp8,0,0.016757333030303318
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,4,4,128,1,float16,float16,0,0.015557333827018738
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,4,4,128,1,float16,fp8,0,0.01578666642308235
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,4,1,128,1,float16,fp8,0,0.01613333324591319
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,4,1,128,1,float16,float16,0,0.015882667154073715
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,4,4,128,1,fp8,fp8,0,0.016021333634853363
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,4,1,128,1,fp8,fp8,0,0.016330666840076447
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,4,2,128,1,float16,float16,0,0.01607999950647354
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,4,2,128,1,float16,fp8,0,0.016336000214020412
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,128,4,2,128,1,fp8,fp8,0,0.016048000504573185
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,4,4,128,1,float16,fp8,0,0.01573866605758667
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,4,4,128,1,fp8,fp8,0,0.01589866727590561
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,4,4,128,1,float16,float16,0,0.015365333606799444
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,4,1,128,1,float16,fp8,0,0.01651200031240781
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,4,1,128,1,float16,float16,0,0.01578666642308235
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,4,1,128,1,fp8,fp8,0,0.016016000260909397
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,4,2,128,1,float16,float16,0,0.015850666910409927
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,4,2,128,1,fp8,fp8,0,0.01628799984852473
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,128,4,2,128,1,float16,fp8,0,0.016202667107184727
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,4,1,128,1,float16,float16,0,0.060864001512527466
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,4,2,128,1,float16,float16,0,0.0647680014371872
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,4,2,128,1,float16,fp8,0,0.06345599889755249
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,4,2,128,1,fp8,fp8,0,0.06912533442179362
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,4,1,128,1,float16,fp8,0,0.06127466758092245
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,64,4,1,128,1,fp8,fp8,0,0.06572266419728597
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,4,4,128,1,float16,float16,0,0.03667200108369192
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,4,4,128,1,float16,fp8,0,0.035786665976047516
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,4,4,128,1,fp8,fp8,0,0.042768001556396484
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,4,1,128,1,float16,float16,0,0.03527999917666117
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,4,1,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,4,1,128,1,fp8,fp8,0,0.037621334195137024
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,4,2,128,1,float16,float16,0,0.036202666660149894
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,4,2,128,1,fp8,fp8,0,0.039274667700131737
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,4,4,128,1,float16,float16,0,0.023989332218964893
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,4,4,128,1,float16,fp8,0,0.02420266717672348
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,4,1,128,1,float16,float16,0,0.023525332411130268
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,64,4,2,128,1,float16,fp8,0,0.03586133321126302
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,4,4,128,1,fp8,fp8,0,0.026426665484905243
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,4,1,128,1,float16,fp8,0,0.0234400009115537
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,4,2,128,1,float16,float16,0,0.023605334262053173
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,4,4,128,1,float16,float16,0,0.018186666071414948
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,4,2,128,1,fp8,fp8,0,0.025562666356563568
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,4,2,128,1,float16,fp8,0,0.023754666248957317
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,64,4,1,128,1,fp8,fp8,0,0.02515733242034912
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,4,4,128,1,float16,fp8,0,0.018186666071414948
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,4,1,128,1,float16,float16,0,0.017349333812793095
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,4,4,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,4,1,128,1,float16,fp8,0,0.017605333278576534
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,4,2,128,1,float16,float16,0,0.017674667139848072
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,4,1,128,1,fp8,fp8,0,0.018239999810854595
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,4,2,128,1,fp8,fp8,0,0.018853332847356796
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,64,4,2,128,1,float16,fp8,0,0.017738666385412216
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,4,4,128,1,fp8,fp8,0,0.017162666966517765
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,4,4,128,1,float16,fp8,0,0.01621866722901662
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,4,4,128,1,float16,float16,0,0.015850666910409927
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,4,1,128,1,float16,float16,0,0.015893333901961643
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,4,1,128,1,float16,fp8,0,0.016293333222468693
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,4,1,128,1,fp8,fp8,0,0.016837333639462788
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,4,2,128,1,float16,float16,0,0.01591466615597407
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,4,2,128,1,float16,fp8,0,0.0162773331006368
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,64,4,2,128,1,fp8,fp8,0,0.017360000560681026
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,4,4,128,1,fp8,fp8,0,0.016309333344300587
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,4,4,128,1,float16,float16,0,0.015029333531856537
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,4,4,128,1,float16,fp8,0,0.015461333096027374
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,4,1,128,1,float16,float16,0,0.015157333264748255
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,4,1,128,1,float16,fp8,0,0.015743999431530636
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,4,2,128,1,float16,fp8,0,0.015749332805474598
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,4,2,128,1,fp8,fp8,0,0.01659199967980385
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,4,4,128,1,float16,float16,0,0.014522666732470194
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,4,4,128,1,float16,fp8,0,0.014933332800865173
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,4,1,128,1,fp8,fp8,0,0.016437333077192307
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,64,4,2,128,1,float16,float16,0,0.015013333410024643
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,4,4,128,1,fp8,fp8,0,0.015749332805474598
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,4,1,128,1,float16,float16,0,0.014975999792416891
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,4,1,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,4,1,128,1,fp8,fp8,0,0.016074666132529575
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,4,2,128,1,float16,float16,0,0.014789332946141561
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,4,2,128,1,float16,fp8,0,0.015466666469971338
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,4,4,128,1,float16,float16,0,0.014106666048367819
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,64,4,2,128,1,fp8,fp8,0,0.01587733378012975
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,4,4,128,1,float16,fp8,0,0.014592000593741735
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,4,4,128,1,fp8,fp8,0,0.015520000209410986
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,4,1,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,4,1,128,1,fp8,fp8,0,0.015765332927306492
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,4,1,128,1,float16,float16,0,0.01463466634353002
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,4,2,128,1,float16,float16,0,0.014725333700577417
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,4,2,128,1,float16,fp8,0,0.014848000059525171
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,4,4,128,1,float16,fp8,0,0.014501333236694336
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,64,4,2,128,1,fp8,fp8,0,0.01569066693385442
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,4,4,128,1,float16,float16,0,0.013936000565687815
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,4,4,128,1,fp8,fp8,0,0.015487999965747198
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,4,1,128,1,float16,float16,0,0.014629332969586054
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,4,1,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,4,2,128,1,float16,fp8,0,0.014981333166360855
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,4,1,128,1,fp8,fp8,0,0.015749332805474598
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,4,2,128,1,fp8,fp8,0,0.015717333803574245
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,4,1,128,1,float16,fp8,0,0.04442666471004486
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,4,1,128,1,float16,float16,0,0.04381866753101349
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,64,4,2,128,1,float16,float16,0,0.014325333138306936
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,4,1,128,1,fp8,fp8,0,0.052154665191968284
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,4,2,128,1,float16,float16,0,0.04480533301830292
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,4,2,128,1,float16,fp8,0,0.044794668753941856
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,32,4,2,128,1,fp8,fp8,0,0.05359466870625814
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,4,4,128,1,float16,fp8,0,0.02740799884001414
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,4,4,128,1,fp8,fp8,0,0.03324266771475474
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,4,1,128,1,fp8,fp8,0,0.0322080006202062
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,4,1,128,1,float16,fp8,0,0.027456000447273254
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,4,4,128,1,float16,float16,0,0.02756800005833308
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,4,2,128,1,float16,float16,0,0.027866666515668232
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,4,2,128,1,float16,fp8,0,0.02787200113137563
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,4,1,128,1,float16,float16,0,0.027349332968393963
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,4,4,128,1,float16,float16,0,0.019802667200565338
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,32,4,2,128,1,fp8,fp8,0,0.03272533416748047
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,4,4,128,1,float16,fp8,0,0.019786667078733444
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,4,4,128,1,fp8,fp8,0,0.022592000663280487
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,4,1,128,1,float16,fp8,0,0.01913600042462349
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,4,1,128,1,fp8,fp8,0,0.021925332645575207
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,4,1,128,1,float16,float16,0,0.019306667149066925
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,4,2,128,1,fp8,fp8,0,0.022458667556444805
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,4,2,128,1,float16,fp8,0,0.019253333409627277
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,4,4,128,1,float16,float16,0,0.015850666910409927
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,4,4,128,1,float16,fp8,0,0.016143999993801117
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,32,4,2,128,1,float16,float16,0,0.01921066641807556
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,4,4,128,1,fp8,fp8,0,0.01741333305835724
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,4,1,128,1,float16,float16,0,0.015210667004187902
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,4,1,128,1,float16,fp8,0,0.015647999942302704
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,4,2,128,1,float16,fp8,0,0.015962666521469753
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,4,2,128,1,fp8,fp8,0,0.017551999539136887
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,4,2,128,1,float16,float16,0,0.01533866673707962
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,32,4,1,128,1,fp8,fp8,0,0.01718933383623759
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,4,4,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,4,4,128,1,fp8,fp8,0,0.01651200031240781
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,4,4,128,1,float16,float16,0,0.014618666221698126
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,4,1,128,1,fp8,fp8,0,0.016575999557971954
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,4,1,128,1,float16,fp8,0,0.015210667004187902
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,4,2,128,1,fp8,fp8,0,0.016549333930015564
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,4,2,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,4,2,128,1,float16,float16,0,0.014597332725922266
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,32,4,1,128,1,float16,float16,0,0.014544000228246054
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,4,4,128,1,float16,float16,0,0.01392000044385592
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,4,4,128,1,fp8,fp8,0,0.015893333901961643
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,4,1,128,1,float16,fp8,0,0.014576000471909841
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,4,1,128,1,float16,float16,0,0.014266667266686758
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,4,1,128,1,fp8,fp8,0,0.016234666109085083
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,4,2,128,1,float16,float16,0,0.01421333352724711
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,4,2,128,1,float16,fp8,0,0.01470400020480156
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,4,4,128,1,float16,fp8,0,0.014271999398867289
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,32,4,2,128,1,fp8,fp8,0,0.016208000481128693
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,4,4,128,1,float16,float16,0,0.013786666095256805
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,4,1,128,1,float16,float16,0,0.013914667069911957
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,4,4,128,1,float16,fp8,0,0.014053333550691605
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,4,4,128,1,fp8,fp8,0,0.01563199982047081
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,4,1,128,1,float16,fp8,0,0.014479999740918478
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,4,2,128,1,float16,fp8,0,0.014666666587193808
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,4,2,128,1,fp8,fp8,0,0.015978666643301647
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,4,1,128,1,fp8,fp8,0,0.01580799991885821
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,4,4,128,1,float16,float16,0,0.01310933381319046
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,4,4,128,1,fp8,fp8,0,0.015482666591803232
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,4,4,128,1,float16,fp8,0,0.013637332866589228
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,32,4,2,128,1,float16,float16,0,0.013786666095256805
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,4,1,128,1,float16,float16,0,0.013573333621025085
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,4,1,128,1,float16,fp8,0,0.014309333016475042
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,4,2,128,1,float16,fp8,0,0.013983999689420065
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,4,1,128,1,fp8,fp8,0,0.015706667055686314
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,4,2,128,1,fp8,fp8,0,0.01565333331624667
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,32,4,2,128,1,float16,float16,0,0.013637332866589228
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,4,4,128,1,fp8,fp8,0,0.01543466622630755
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,4,4,128,1,float16,float16,0,0.013125333935022354
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,4,1,128,1,float16,fp8,0,0.014240000396966934
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,4,4,128,1,float16,fp8,0,0.013327999661366144
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,4,1,128,1,float16,float16,0,0.01381333296497663
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,4,1,128,1,fp8,fp8,0,0.015546667079130808
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,4,2,128,1,float16,float16,0,0.013717333475748697
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,4,2,128,1,fp8,fp8,0,0.015706667055686314
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,32,4,2,128,1,float16,fp8,0,0.014368000129858652
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,4,1,128,1,float16,float16,0,0.03937600056330363
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,4,1,128,1,float16,fp8,0,0.03997333347797394
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,4,2,128,1,float16,fp8,0,0.04035199930270513
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,4,4,128,1,float16,float16,0,0.023973333338896435
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,4,2,128,1,float16,float16,0,0.04002666721741358
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,4,2,128,1,fp8,fp8,0,0.04703466594219208
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,4,4,128,1,fp8,fp8,0,0.029674666623274486
TRTLLM,0.20.0,NVIDIA H200,context_attention,256,16,4,1,128,1,fp8,fp8,0,0.04614933331807455
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,4,4,128,1,float16,fp8,0,0.024165332317352295
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,4,1,128,1,float16,float16,0,0.025466665625572205
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,4,1,128,1,float16,fp8,0,0.025237334271272022
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,4,1,128,1,fp8,fp8,0,0.029189333319664
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,4,2,128,1,float16,float16,0,0.02532266577084859
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,4,2,128,1,float16,fp8,0,0.025205334027608235
TRTLLM,0.20.0,NVIDIA H200,context_attention,128,16,4,2,128,1,fp8,fp8,0,0.02942933390537898
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,4,4,128,1,float16,float16,0,0.01788266624013583
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,4,1,128,1,float16,float16,0,0.017797333498795826
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,4,4,128,1,fp8,fp8,0,0.02089066555102666
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,4,1,128,1,float16,fp8,0,0.018133333573738735
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,4,2,128,1,float16,float16,0,0.01826133330663045
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,4,1,128,1,fp8,fp8,0,0.02103466788927714
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,4,2,128,1,float16,fp8,0,0.01836799954374631
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,4,4,128,1,float16,fp8,0,0.01815466706951459
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,4,4,128,1,float16,float16,0,0.015157333264748255
TRTLLM,0.20.0,NVIDIA H200,context_attention,64,16,4,2,128,1,fp8,fp8,0,0.021018666525681812
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,4,4,128,1,float16,fp8,0,0.015482666591803232
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,4,4,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,4,1,128,1,float16,float16,0,0.014933332800865173
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,4,1,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,4,1,128,1,fp8,fp8,0,0.016783999900023144
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,4,2,128,1,float16,fp8,0,0.015274666249752045
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,4,2,128,1,float16,float16,0,0.014890667051076889
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,4,4,128,1,float16,float16,0,0.014165333161751429
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,4,4,128,1,fp8,fp8,0,0.01618133361140887
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,4,4,128,1,float16,fp8,0,0.014522666732470194
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,4,1,128,1,float16,fp8,0,0.014698666830857595
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,4,1,128,1,float16,float16,0,0.014271999398867289
TRTLLM,0.20.0,NVIDIA H200,context_attention,32,16,4,2,128,1,fp8,fp8,0,0.016741332908471424
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,4,1,128,1,fp8,fp8,0,0.01642666632930438
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,4,2,128,1,float16,fp8,0,0.014842666685581207
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,4,2,128,1,fp8,fp8,0,0.016415999581416447
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,4,4,128,1,float16,float16,0,0.013744000345468521
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,4,4,128,1,fp8,fp8,0,0.01616000011563301
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,4,4,128,1,float16,fp8,0,0.014261333892742792
TRTLLM,0.20.0,NVIDIA H200,context_attention,16,16,4,2,128,1,float16,float16,0,0.01422400027513504
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,4,1,128,1,float16,fp8,0,0.014602666099866232
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,4,1,128,1,float16,float16,0,0.0138026662170887
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,4,1,128,1,fp8,fp8,0,0.016048000504573185
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,4,2,128,1,float16,float16,0,0.013967999567588171
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,4,2,128,1,float16,fp8,0,0.014650666465361914
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,4,4,128,1,float16,float16,0,0.013354666531085968
TRTLLM,0.20.0,NVIDIA H200,context_attention,8,16,4,2,128,1,fp8,fp8,0,0.016149333367745083
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,4,4,128,1,float16,fp8,0,0.014021333307027817
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,4,4,128,1,fp8,fp8,0,0.015696000307798386
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,4,1,128,1,float16,fp8,0,0.014229333649079004
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,4,1,128,1,float16,float16,0,0.01368533323208491
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,4,2,128,1,float16,fp8,0,0.014298666268587112
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,4,2,128,1,float16,float16,0,0.013749333719412485
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,4,1,128,1,fp8,fp8,0,0.015824000040690105
TRTLLM,0.20.0,NVIDIA H200,context_attention,4,16,4,2,128,1,fp8,fp8,0,0.015941333025693893
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,4,4,128,1,float16,fp8,0,0.013429333766301474
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,4,4,128,1,fp8,fp8,0,0.01553600033124288
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,4,4,128,1,float16,float16,0,0.01333333303531011
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,4,1,128,1,fp8,fp8,0,0.015605332950750986
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,4,1,128,1,float16,fp8,0,0.014303999642531076
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,4,2,128,1,float16,float16,0,0.01368533323208491
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,4,1,128,1,float16,float16,0,0.01350933313369751
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,4,2,128,1,float16,fp8,0,0.0141546664138635
TRTLLM,0.20.0,NVIDIA H200,context_attention,2,16,4,2,128,1,fp8,fp8,0,0.015546667079130808
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,4,4,128,1,float16,float16,0,0.012981332838535309
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,4,4,128,1,float16,fp8,0,0.013514666507641474
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,4,4,128,1,fp8,fp8,0,0.015392000476519266
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,4,1,128,1,float16,float16,0,0.013674666484196981
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,4,1,128,1,float16,fp8,0,0.014106666048367819
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,4,2,128,1,float16,float16,0,0.013701333353916803
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,4,1,128,1,fp8,fp8,0,0.015466666469971338
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,4,2,128,1,fp8,fp8,0,0.015530666957298914
TRTLLM,0.20.0,NVIDIA H200,context_attention,1,16,4,2,128,1,float16,fp8,0,0.01421333352724711
