framework,version,device,op_name,mla_dtype,kv_cache_dtype,batch_size,isl,tp_size,step,latency
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16,1,0,0.014538666854302088
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16,16,0,0.01239466667175293
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16,4,0,0.013082666943470636
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16,8,0,0.013306666165590286
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16,2,0,0.013354666531085968
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16,8,0,0.012810666114091873
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16,16,0,0.013013333082199097
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16,4,0,0.013290667285521826
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16,32,0,0.012319999436537424
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16,64,0,0.012186666329701742
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16,32,0,0.01209066684047381
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,32,1,0,0.015439999600251516
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,32,2,0,0.013669333110253016
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16,64,0,0.012213333199421564
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16,2,0,0.01350933313369751
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16,1,0,0.014533333480358124
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,32,4,0,0.01358933374285698
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,32,8,0,0.013125333935022354
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,32,16,0,0.012250666817029318
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,32,32,0,0.012917333592971167
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,32,64,0,0.012458667159080505
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,32,1,0,0.014570667097965876
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,32,2,0,0.013797332843144735
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,32,4,0,0.013306666165590286
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,32,8,0,0.01301866645614306
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,32,16,0,0.013525333255529404
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,32,32,0,0.012778667112191519
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,32,64,0,0.012885333349307379
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,64,1,0,0.01653333380818367
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,64,2,0,0.014698666830857595
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,64,4,0,0.014053333550691605
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,64,8,0,0.013914667069911957
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,64,32,0,0.013760000467300415
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,64,16,0,0.014127999544143677
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,64,64,0,0.013631999492645264
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,64,1,0,0.01648533344268799
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,64,2,0,0.01532799998919169
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,64,4,0,0.014655999839305878
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,64,16,0,0.014015999933083853
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,64,32,0,0.013760000467300415
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,64,8,0,0.014501333236694336
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,64,64,0,0.013797332843144735
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,128,1,0,0.019194666296243668
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,128,4,0,0.01621333385507266
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,128,32,0,0.015135999768972397
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,128,8,0,0.01581866666674614
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,128,16,0,0.015322666615247726
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,128,2,0,0.016837333639462788
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,128,1,0,0.018992000569899876
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,128,64,0,0.015013333410024643
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,128,2,0,0.0161920003592968
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,128,4,0,0.016165333489576977
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,128,8,0,0.01578666642308235
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,128,16,0,0.01543466622630755
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,128,32,0,0.014965333044528961
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,128,64,0,0.01505600040157636
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,256,1,0,0.0281333327293396
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,256,4,0,0.01803733284274737
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,256,16,0,0.01692266638080279
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,256,8,0,0.017525333911180496
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,256,2,0,0.0195573332409064
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,256,32,0,0.01628799984852473
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,256,64,0,0.016048000504573185
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,256,4,0,0.017871999492247898
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,256,2,0,0.019509332875410717
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,256,1,0,0.0283146674434344
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,256,8,0,0.017653333644072216
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,256,16,0,0.016927999754746754
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,256,64,0,0.016261332978804905
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,256,32,0,0.016282666474580765
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,512,1,0,0.05610666672388712
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,512,16,0,0.020810666183630627
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,512,4,0,0.02239999920129776
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,512,8,0,0.02178666740655899
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,512,32,0,0.020282667130231857
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,512,2,0,0.03195200115442276
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,512,64,0,0.019989332805077236
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,512,2,0,0.03189333279927572
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,512,4,0,0.022639999787012737
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,512,8,0,0.021503999829292297
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,512,32,0,0.02035733312368393
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,512,64,0,0.020197333147128422
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,512,1,0,0.056261335810025535
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,512,16,0,0.020853333175182343
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1024,16,0,0.028991999725500744
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1024,32,0,0.027978666126728058
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1024,8,0,0.03012799968322118
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1024,1,0,0.12628799676895142
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1024,4,0,0.039994666973749794
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1024,2,0,0.06868266562620799
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1024,64,0,0.02794133375088374
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1024,32,0,0.028250666956106823
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1024,16,0,0.028912000358104706
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1024,8,0,0.029994666576385498
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1024,4,0,0.039408000806967415
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1024,64,0,0.027994667490323383
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1024,2,0,0.06926933427651723
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1024,1,0,0.1262079974015554
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1536,16,0,0.03746666759252548
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1536,8,0,0.039877332746982574
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1536,32,0,0.0365226666132609
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1536,64,0,0.035904000202814736
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1536,4,0,0.06450133522351582
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1536,2,0,0.12582932909329733
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,1536,1,0,0.22428800662358603
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1536,16,0,0.03774933268626531
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1536,8,0,0.039733332892258964
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1536,32,0,0.036271999279658
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1536,64,0,0.035642666121323906
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1536,4,0,0.06523733337720235
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1536,2,0,0.12787200013796488
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,1536,1,0,0.22509332497914633
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,2048,16,0,0.04569066564242045
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,2048,8,0,0.05583466589450836
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,2048,32,0,0.04446400205294291
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,2048,4,0,0.09923199812571208
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,2048,64,0,0.04334400097529093
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,2048,2,0,0.19364267587661743
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,2048,1,0,0.35100265343983966
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,2048,8,0,0.05587733288606008
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,2048,32,0,0.04428799947102865
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,2048,16,0,0.045882667104403176
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,2048,4,0,0.09734933574994405
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,2048,2,0,0.1904266675313314
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,2048,64,0,0.04347200194994608
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,2048,1,0,0.3524426619211833
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,3072,32,0,0.06029333174228668
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,3072,16,0,0.0634933312733968
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,3072,64,0,0.05914666752020518
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,3072,8,0,0.10345600048700969
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,3072,4,0,0.20702399810155234
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,3072,2,0,0.3691573143005371
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,3072,16,0,0.0639573335647583
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,3072,8,0,0.103685329357783
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,3072,32,0,0.06063466767470042
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,3072,4,0,0.20568533738454184
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,3072,64,0,0.059450666109720864
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,3072,1,0,0.6977813243865967
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,3072,2,0,0.3631306489308675
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,4096,32,0,0.07645333309968312
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,4096,8,0,0.15797332922617593
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,4096,16,0,0.0867199997107188
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,4096,64,0,0.07497600217660268
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,4096,4,0,0.3299093246459961
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,3072,1,0,0.6999306678771973
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,4096,2,0,0.6048213243484497
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,4096,8,0,0.15661866466204324
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,4096,16,0,0.08698667089144389
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,4096,32,0,0.07640533149242401
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,4096,4,0,0.3271733323733012
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,4096,64,0,0.07496533294518788
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,4096,2,0,0.5999413331349691
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,4096,1,0,1.1683786710103352
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,6144,16,0,0.17747199535369873
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,6144,8,0,0.3657279809315999
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,6144,64,0,0.10624000430107117
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,6144,32,0,0.11449066797892253
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,6144,4,0,0.6561546723047892
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,4096,1,0,1.154863993326823
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,6144,2,0,1.253450632095337
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,6144,32,0,0.11356799801190694
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,6144,16,0,0.17747733990351358
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,6144,8,0,0.36427199840545654
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,6144,64,0,0.10700800021489461
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,6144,4,0,0.6597546736399332
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,6144,2,0,1.2408640384674072
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,8192,16,0,0.2812426686286926
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,6144,1,0,2.3863040606180825
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,8192,8,0,0.6010933319727579
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,8192,4,0,1.0929919878641765
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,8192,32,0,0.14974400401115417
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,8192,64,0,0.14230400323867798
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,6144,1,0,2.437386671702067
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,8192,16,0,0.281823992729187
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,8192,8,0,0.5938186645507812
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,8192,2,0,2.1051200230916343
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,8192,32,0,0.15069333712259927
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,8192,64,0,0.14145599802335104
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,8192,4,0,1.092293341954549
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,8192,2,0,2.1333972613016763
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,10240,16,0,0.42392532030741376
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,10240,8,0,0.8890666961669922
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,8192,1,0,4.114698727925618
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,10240,64,0,0.17814934253692627
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,10240,32,0,0.23651200532913208
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,10240,4,0,1.6627519925435383
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,8192,1,0,4.409599939982097
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,10240,8,0,0.8901387055714926
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,10240,16,0,0.428117314974467
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,10240,2,0,3.2157974243164062
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,10240,64,0,0.17867199579874674
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,10240,32,0,0.2318613330523173
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,10240,4,0,1.6426560084025066
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,10240,2,0,3.18942928314209
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,12288,16,0,0.6811467011769613
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,12288,8,0,1.209439992904663
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,12288,32,0,0.326693336168925
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,12288,4,0,2.368127981821696
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,10240,1,0,6.865994771321614
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,12288,64,0,0.21180800596872965
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,12288,16,0,0.6799840132395426
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,12288,8,0,1.2310559749603271
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,10240,1,0,6.941290537516276
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,12288,2,0,4.642778714497884
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,12288,32,0,0.3250826597213745
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,12288,64,0,0.21237866083780924
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,12288,4,0,2.3503467241923013
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,12288,2,0,4.505872090657552
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16384,16,0,1.1385013262430828
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16384,8,0,2.082197348276774
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16384,32,0,0.5216853221257528
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16384,64,0,0.2781066695849101
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16384,4,0,4.27509880065918
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,12288,1,0,9.82050641377767
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16384,8,0,2.0865066846211753
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,12288,1,0,9.68515714009603
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16384,16,0,1.139957348505656
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16384,2,0,8.81277847290039
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16,1,0,0.019205333044131596
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16384,64,0,0.2765493392944336
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16,2,0,0.014698666830857595
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16384,4,0,4.0935360590616865
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16384,32,0,0.522218664487203
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16,4,0,0.014357333381970724
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16,8,0,0.0136266661187013
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16,16,0,0.013082666943470636
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16,64,0,0.012986666212479273
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16,32,0,0.012906666845083237
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16,1,0,0.018543999642133713
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16,2,0,0.015114666273196539
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16,8,0,0.013663999736309052
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16,4,0,0.0144213338692983
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16,16,0,0.01301866645614306
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16,32,0,0.012906666845083237
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16,64,0,0.013034666577974955
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,32,1,0,0.01877333347996076
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,32,2,0,0.015109332899252573
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,32,4,0,0.01422400027513504
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,32,8,0,0.013925333817799887
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,32,32,0,0.013082666943470636
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,32,16,0,0.013647999614477158
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,32,64,0,0.01312000056107839
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,32,1,0,0.018629333625237148
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,32,2,0,0.015253332753976187
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,32,4,0,0.014442666123310724
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,32,8,0,0.013973332941532135
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,32,16,0,0.013455999394257864
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16384,2,0,8.623296101888021
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,32,32,0,0.013130666067202887
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,32,64,0,0.013066666821638743
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,64,1,0,0.020346666375796
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,64,2,0,0.016517333686351776
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,64,4,0,0.015135999768972397
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,64,8,0,0.014725333700577417
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,64,16,0,0.014117332796255747
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,64,32,0,0.013957332819700241
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,64,64,0,0.013893333574136099
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,64,1,0,0.020432000358899433
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,64,2,0,0.0169813334941864
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,64,4,0,0.015173333386580149
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,64,8,0,0.014335999886194864
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,64,16,0,0.014490666488806406
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,64,32,0,0.0143306665122509
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,64,64,0,0.013914667069911957
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,128,2,0,0.019589333484570186
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,128,4,0,0.016293333222468693
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,128,8,0,0.016384000579516094
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,128,1,0,0.027210667729377747
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,128,16,0,0.015872000406185787
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,1,16384,1,0,17.296229044596355
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,128,32,0,0.015637333194414776
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,128,64,0,0.015376000354687372
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,128,1,0,0.02735999971628189
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,128,2,0,0.01942933350801468
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,128,8,0,0.016255999604860943
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,128,4,0,0.016554666062196095
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,128,16,0,0.016042667130629223
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,128,32,0,0.015674666812022526
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,128,64,0,0.015130666395028433
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,256,8,0,0.018207999567190807
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,256,16,0,0.017903999735911686
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,256,4,0,0.01945066700379054
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,256,2,0,0.028384000062942505
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,256,32,0,0.017045332739750545
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,256,1,0,0.05158400038878123
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,256,64,0,0.016410666207472484
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,1,16384,1,0,17.08557383219401
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,256,4,0,0.019445333629846573
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,256,2,0,0.027930667002995808
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,256,16,0,0.018138666947682697
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,256,8,0,0.0185759998857975
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,256,64,0,0.016480000068744022
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,256,32,0,0.01659199967980385
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,256,1,0,0.05152533451716105
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,512,8,0,0.02314666658639908
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,512,16,0,0.021754667162895203
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,512,32,0,0.02142400046189626
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,512,4,0,0.032272001107533775
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,512,64,0,0.020693333198626835
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,512,2,0,0.05754666527112325
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,512,1,0,0.09681066870689392
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,512,64,0,0.020448000480731327
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,512,8,0,0.02311466634273529
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,512,16,0,0.021898667017618816
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,512,32,0,0.02094399929046631
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,512,1,0,0.0944160024325053
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,512,4,0,0.03330666571855545
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,512,2,0,0.05738133192062378
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1024,64,0,0.028965334097544353
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1024,32,0,0.0296426663796107
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1024,16,0,0.030805334448814392
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1024,8,0,0.041375999649365745
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1024,4,0,0.06811200082302094
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1024,2,0,0.12607466181119284
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1024,32,0,0.029733332494894665
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1024,16,0,0.030762667457262676
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1024,1,0,0.23108800252278647
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1024,8,0,0.0409706657131513
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1024,64,0,0.028933333853880566
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1024,4,0,0.06799466907978058
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1024,1,0,0.22828267018000284
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1024,2,0,0.12727466225624084
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1536,8,0,0.06613333523273468
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1536,16,0,0.0410453329483668
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1536,32,0,0.03849600007136663
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1536,4,0,0.12808533509572348
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1536,64,0,0.03711999952793121
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1536,2,0,0.22893865903218588
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1536,32,0,0.038176000118255615
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1536,16,0,0.041152000427246094
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1536,8,0,0.06605333089828491
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1536,4,0,0.12723732988039652
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1536,64,0,0.037087999284267426
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,1536,1,0,0.4242986838022868
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1536,2,0,0.22829333941141763
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,2048,8,0,0.09995733698209126
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,2048,16,0,0.05709333221117655
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,1536,1,0,0.42977599302927655
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,2048,32,0,0.04674133161703745
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,2048,64,0,0.04534933467706045
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,2048,4,0,0.19319466749827066
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,2048,2,0,0.3559733231862386
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,2048,32,0,0.047423998514811196
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,2048,8,0,0.09835199515024821
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,2048,16,0,0.05745066702365875
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,2048,4,0,0.1965226729710897
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,2048,64,0,0.045567999283472695
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,2048,2,0,0.35787200927734375
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,2048,1,0,0.6739947001139323
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,3072,16,0,0.10577600200970967
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,3072,8,0,0.20983999967575073
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,3072,64,0,0.06259199976921082
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,3072,32,0,0.06728533407052358
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,3072,4,0,0.37335999806722003
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,2048,1,0,0.6805493036905924
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,3072,16,0,0.10597333312034607
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,3072,2,0,0.6996479829152426
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,3072,8,0,0.20898133516311646
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,3072,32,0,0.06772799789905548
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,3072,4,0,0.3665119806925456
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,3072,64,0,0.0625546673933665
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,3072,2,0,0.7046613693237305
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,4096,16,0,0.1629866659641266
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,4096,8,0,0.32523733377456665
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,3072,1,0,1.3648692766825359
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,4096,4,0,0.6026293436686198
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,4096,32,0,0.0897173285484314
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,4096,64,0,0.08027733365694682
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,3072,1,0,1.3638827006022136
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,4096,16,0,0.16242667039235434
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,4096,2,0,1.171290636062622
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,4096,8,0,0.3322559992472331
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,4096,4,0,0.6045813163121542
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,4096,32,0,0.08985599875450134
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,4096,64,0,0.07979733248551686
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,6144,16,0,0.3717546860376994
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,4096,2,0,1.1611093680063884
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,4096,1,0,2.268474737803141
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,6144,8,0,0.6568746566772461
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,6144,64,0,0.1200373371442159
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,6144,32,0,0.18357867002487183
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,6144,4,0,1.2400426864624023
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,4096,1,0,2.245584011077881
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,6144,16,0,0.3682560125986735
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,6144,8,0,0.6658240159352621
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,6144,32,0,0.18152532974878946
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,6144,2,0,2.4373226165771484
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,6144,64,0,0.11939199765523274
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,6144,4,0,1.245194673538208
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,6144,2,0,2.420794645945231
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,8192,16,0,0.5998239914576212
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,8192,8,0,1.1012427012125652
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,8192,64,0,0.1571999986966451
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,8192,32,0,0.28596266110738117
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,8192,4,0,2.1386133829752603
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,6144,1,0,5.2008107503255205
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,8192,16,0,0.6028159856796265
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,6144,1,0,4.786586761474609
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,8192,8,0,1.1107892990112305
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,8192,2,0,4.367424011230469
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,8192,32,0,0.2882506648699443
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,8192,64,0,0.15568533539772034
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,8192,4,0,2.1133119265238443
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,10240,16,0,0.8941546281178793
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,10240,8,0,1.6575946807861328
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,8192,2,0,4.5058027903238935
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,10240,32,0,0.43160001436869305
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,10240,64,0,0.24237332741419473
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,10240,4,0,3.2395359675089517
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,8192,1,0,8.976666768391928
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,10240,8,0,1.6697333653767903
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,10240,16,0,0.8984373410542806
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,10240,2,0,6.664943695068359
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,8192,1,0,8.913637161254883
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,10240,64,0,0.24321067333221436
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,10240,4,0,3.39570681254069
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,10240,32,0,0.43433066209157306
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,12288,16,0,1.2478880087534587
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,12288,8,0,2.3723626136779785
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,10240,2,0,6.800341288248698
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,12288,32,0,0.6893333594004313
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,12288,64,0,0.3346560001373291
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,12288,4,0,4.630938529968262
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,10240,1,0,13.795696258544922
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,12288,8,0,2.358762741088867
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,12288,16,0,1.2457280158996582
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,12288,4,0,4.623674710591634
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,12288,2,0,9.697578430175781
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,12288,32,0,0.7010773022969564
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,12288,64,0,0.336026668548584
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,10240,1,0,13.889717102050781
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,12288,2,0,9.944698969523111
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16384,16,0,2.0926987330118814
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16384,8,0,4.049450556437175
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16384,32,0,1.1480533281962078
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16384,64,0,0.5375306606292725
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16384,4,0,8.6626345316569
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,12288,1,0,19.33019256591797
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16384,8,0,4.229157447814941
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16384,16,0,2.0729494094848633
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,12288,1,0,19.531504313151043
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16384,2,0,17.066661834716797
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16384,64,0,0.5308746496836344
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16384,32,0,1.1489280064900715
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16384,4,0,8.873002370198568
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,16,1,0,0.025909334421157837
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,16,2,0,0.018432000031073887
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,16,4,0,0.015034666905800501
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,16,8,0,0.01402666668097178
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,16,16,0,0.01358933374285698
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,16,32,0,0.013242666920026144
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,16,64,0,0.01314666618903478
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,16,1,0,0.025253333151340485
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,16,2,0,0.018325333793958027
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,16,4,0,0.01524266724785169
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,16,8,0,0.014096000542243322
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,16,16,0,0.01350933313369751
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,16,32,0,0.013301332791646322
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,16,64,0,0.0129120002190272
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,32,4,0,0.015344000111023584
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,32,2,0,0.018602666755517323
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,32,1,0,0.02548266698916753
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,32,8,0,0.014256000518798828
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,32,16,0,0.013541333377361298
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,32,32,0,0.013610667238632837
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,32,64,0,0.013269333789745966
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,32,1,0,0.025487999121348064
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,32,2,0,0.01830400029818217
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,32,4,0,0.015317333241303762
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,32,8,0,0.014229333649079004
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,32,16,0,0.013834666460752487
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,32,32,0,0.01328533391157786
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,32,64,0,0.01320533330241839
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,64,2,0,0.02073066681623459
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,64,4,0,0.016586666305859882
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,64,1,0,0.029733332494894665
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,64,8,0,0.015141333142916361
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,64,16,0,0.014698666830857595
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,64,32,0,0.014485333114862442
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,64,64,0,0.01440000037352244
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,64,1,0,0.029663999875386555
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16384,2,0,17.32086944580078
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,64,2,0,0.02056533346573512
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,64,4,0,0.016506666938463848
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,64,8,0,0.015141333142916361
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,64,16,0,0.01471466695268949
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,64,32,0,0.014111999422311783
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,64,64,0,0.01431999976436297
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,128,2,0,0.02755733331044515
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,128,4,0,0.01929066702723503
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,128,1,0,0.049413333336512245
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,128,8,0,0.017162666966517765
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,128,16,0,0.016000000139077503
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,128,32,0,0.01579733317097028
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,128,64,0,0.01595199977358182
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,128,4,0,0.019381333142518997
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,128,2,0,0.02812800059715907
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,128,1,0,0.04976533353328705
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,128,8,0,0.016565332810084026
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,128,16,0,0.016234666109085083
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,128,32,0,0.015872000406185787
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,128,64,0,0.015664000064134598
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,256,8,0,0.020314666132132213
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,256,4,0,0.02889599899450938
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,256,16,0,0.018874666343132656
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,256,2,0,0.05230399966239929
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,256,32,0,0.018079999834299088
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,256,1,0,0.0792746643225352
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,256,64,0,0.01709866647919019
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,256,4,0,0.028922667105992634
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,256,8,0,0.02004266654451688
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,256,2,0,0.052906667192777
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,256,16,0,0.018768000106016796
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,256,32,0,0.017845333864291508
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,256,1,0,0.08011733492215474
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,256,64,0,0.0169813334941864
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,512,16,0,0.023520000278949738
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,512,8,0,0.03374933451414108
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,512,32,0,0.022613334159056347
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,512,4,0,0.05723733206590017
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,512,2,0,0.09302933017412822
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,512,64,0,0.021386665602525074
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,512,1,0,0.170799990495046
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,512,8,0,0.03321066747109095
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,512,4,0,0.05735999842484792
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,512,2,0,0.09393067161242168
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,512,16,0,0.023936000963052113
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,2,16384,1,0,34.04227193196615
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,512,32,0,0.022495999932289124
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,512,64,0,0.02141333371400833
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,512,1,0,0.1722453236579895
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1024,16,0,0.04293333490689596
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1024,8,0,0.07083733379840851
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1024,32,0,0.032170665760835014
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1024,4,0,0.1288800040880839
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1024,64,0,0.03073066721359889
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1024,2,0,0.23199466864267984
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1024,4,0,0.12662933270136514
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1024,8,0,0.07057066758473714
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1024,16,0,0.04250133534272512
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1024,32,0,0.032314665615558624
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1024,64,0,0.030693332354227703
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1024,2,0,0.23124800125757852
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1024,1,0,0.44044800599416095
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1536,32,0,0.04446400205294291
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1536,16,0,0.06860800087451935
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1536,8,0,0.1300159990787506
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1024,1,0,0.4427146514256795
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1536,4,0,0.22926400105158487
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1536,64,0,0.039936001102129616
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1536,2,0,0.43175466855367023
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,2,16384,1,0,34.32105509440104
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1536,8,0,0.12995200355847678
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1536,16,0,0.06817066669464111
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1536,32,0,0.045066664616266884
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1536,64,0,0.04026666780312856
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1536,4,0,0.229967991511027
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1536,2,0,0.4246079921722412
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,2048,16,0,0.10312533378601074
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,1536,1,0,0.8429653644561768
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,2048,8,0,0.1988746722539266
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,2048,32,0,0.06001600126425425
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,2048,64,0,0.05027733246485392
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,2048,4,0,0.36085331439971924
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,1536,1,0,0.8304479916890463
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,2048,2,0,0.6850720246632894
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,2048,16,0,0.10381333033243816
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,2048,8,0,0.1986453334490458
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,2048,32,0,0.06012799839178721
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,2048,64,0,0.05049066742261251
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,2048,4,0,0.35391465822855633
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,2048,2,0,0.6903519630432129
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,3072,16,0,0.21262933810551962
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,3072,8,0,0.37489068508148193
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,2048,1,0,1.3349013328552246
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,3072,32,0,0.11136533816655476
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,3072,64,0,0.07208533088366191
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,3072,4,0,0.7029279867808024
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,2048,1,0,1.3545066515604656
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,3072,16,0,0.21273066600163779
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,3072,8,0,0.37118399143218994
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,3072,2,0,1.3713493347167969
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,3072,64,0,0.07217599948247273
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,3072,32,0,0.11191466450691223
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,3072,4,0,0.7126666704813639
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,4096,16,0,0.3401493231455485
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,3072,2,0,1.362053394317627
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,4096,8,0,0.6172800064086914
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,4096,32,0,0.16925867398579916
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,4096,64,0,0.09596799810727437
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,4096,4,0,1.1653760274251301
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,3072,1,0,2.790805180867513
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,4096,16,0,0.3386186758677165
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,4096,8,0,0.6088320016860962
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,4096,2,0,2.2936320304870605
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,4096,32,0,0.16980266571044922
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,3072,1,0,2.7205066680908203
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,4096,4,0,1.1628586451212566
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,4096,64,0,0.09674666325251262
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,6144,16,0,0.6685600280761719
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,4096,2,0,2.2992053031921387
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,6144,8,0,1.247754653294881
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,6144,32,0,0.3755040168762207
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,6144,64,0,0.19299199183781943
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,4096,1,0,4.4635467529296875
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,6144,4,0,2.4363840421040854
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,6144,16,0,0.6689173380533854
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,6144,8,0,1.237888018290202
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,4096,1,0,4.919407844543457
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,6144,32,0,0.37569598356882733
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,6144,64,0,0.19082132975260416
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,6144,4,0,2.4500479698181152
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,6144,2,0,5.171221415201823
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,8192,16,0,1.1224106947580974
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,8192,8,0,2.1389919916788735
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,6144,2,0,5.119333267211914
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,8192,32,0,0.6138879855473837
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,8192,64,0,0.30055999755859375
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,8192,4,0,4.138298670450847
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,6144,1,0,10.41433588663737
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,8192,8,0,2.098288059234619
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,8192,16,0,1.1139307022094727
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,8192,32,0,0.6121813456217448
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,8192,4,0,4.458799997965495
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,6144,1,0,10.415632247924805
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,8192,2,0,8.958229064941406
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,8192,64,0,0.30132800340652466
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,16,2,0,0.02481599897146225
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,16,1,0,0.037733333806196846
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,16,4,0,0.01844266677896182
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,16,8,0,0.015301333119471868
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,16,16,0,0.01413333291808764
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,16,64,0,0.013034666577974955
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,16,32,0,0.013616000612576803
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,16,1,0,0.039120001097520195
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,16,2,0,0.024469333390394848
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,16,4,0,0.018464000274737675
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,16,8,0,0.015082667271296183
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,16,16,0,0.013999999811251959
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,16,32,0,0.013546666751305262
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,16,64,0,0.013349333157142004
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,32,8,0,0.014917333920796713
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,32,4,0,0.018330667167901993
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,32,2,0,0.024847999215126038
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,32,1,0,0.04042666653792063
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,32,16,0,0.014149333039919535
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,32,32,0,0.013749333719412485
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,32,64,0,0.013642666240533194
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,32,1,0,0.039520000418027244
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,32,2,0,0.024933333198229473
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,32,4,0,0.018090666582187016
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,32,8,0,0.015061333775520325
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,32,16,0,0.01403733342885971
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,32,64,0,0.013536000003417334
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,32,32,0,0.013797332843144735
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,64,1,0,0.04981866478919983
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,64,2,0,0.030058667063713074
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,64,4,0,0.020666666328907013
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,8192,2,0,9.10263442993164
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,64,16,0,0.015135999768972397
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,64,8,0,0.016656000167131424
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,64,32,0,0.014954666296641031
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,64,64,0,0.014394666999578476
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,64,8,0,0.01659199967980385
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,64,4,0,0.020634666085243225
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,64,2,0,0.03121600051720937
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,64,1,0,0.04974933465321859
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,64,32,0,0.014778666198253632
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,64,16,0,0.015194666882356008
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,64,64,0,0.014650666465361914
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,128,4,0,0.028864001234372456
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,128,16,0,0.01692266638080279
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,128,8,0,0.019776000330845516
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,128,2,0,0.04872000217437744
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,128,1,0,0.08396266897519429
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,128,32,0,0.016714667280515034
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,128,64,0,0.01617066686352094
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,128,4,0,0.028533334533373516
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,128,8,0,0.019733333339293797
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,128,2,0,0.04844266672929128
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,128,16,0,0.01710933322707812
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,128,32,0,0.016682667036851246
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,128,1,0,0.08500799536705017
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,128,64,0,0.01605333387851715
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,256,4,0,0.052928000688552856
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,256,16,0,0.020319999506076176
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,256,8,0,0.030506665507952373
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,256,2,0,0.07949866851170857
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,256,32,0,0.019354666272799175
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,256,64,0,0.01858666663368543
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,256,1,0,0.14289066195487976
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,256,8,0,0.030224000414212544
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,256,4,0,0.0524533341328303
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,256,16,0,0.02040533348917961
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,256,2,0,0.08007466793060303
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,256,32,0,0.019589333484570186
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,256,64,0,0.018570666511853535
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,256,1,0,0.14361066619555155
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,512,8,0,0.06001066664854685
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,4,8192,1,0,18.072554270426433
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,512,16,0,0.0354720006386439
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,512,32,0,0.024725332856178284
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,512,4,0,0.09507733583450317
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,512,64,0,0.02346666653951009
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,512,2,0,0.1727679967880249
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,512,8,0,0.059989333152770996
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,512,16,0,0.03562133262554804
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,512,32,0,0.024847999215126038
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,512,4,0,0.09483733773231506
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,512,1,0,0.3235413432121277
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,512,64,0,0.02366400013367335
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,512,2,0,0.17353065808614096
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1024,32,0,0.04625066618124644
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1024,16,0,0.07382933298746745
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1024,8,0,0.13214932878812155
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,512,1,0,0.32625067234039307
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1024,64,0,0.03624533365170161
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1024,4,0,0.23471999168395996
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,4,8192,1,0,18.054378509521484
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1024,16,0,0.07334933181603749
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1024,2,0,0.44439999262491864
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1024,8,0,0.13268799583117166
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1024,32,0,0.04671466847260793
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1024,4,0,0.23669334252675375
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1024,64,0,0.036133334040641785
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1024,2,0,0.4432479937871297
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1536,16,0,0.1357973317305247
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1536,8,0,0.23590399821599325
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1536,32,0,0.07332799832026164
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1024,1,0,0.8571626345316569
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1536,64,0,0.05031999945640564
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1536,4,0,0.4317760070164998
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1536,16,0,0.13583466410636902
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1024,1,0,0.8747413158416748
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1536,8,0,0.23443732659022012
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1536,2,0,0.8463679949442545
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1536,64,0,0.04971200227737427
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1536,32,0,0.07351466516653697
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1536,4,0,0.42863468329111737
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,2048,16,0,0.2034133275349935
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1536,2,0,0.8362080256144205
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,2048,8,0,0.3629066546758016
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,2048,32,0,0.1083733340104421
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,2048,64,0,0.06710400183995564
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,2048,4,0,0.6975999673207601
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,1536,1,0,1.6510240236918132
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,2048,16,0,0.20507200558980307
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,2048,8,0,0.3682933251063029
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,2048,2,0,1.360090732574463
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,1536,1,0,1.6653547286987305
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,2048,64,0,0.06704533100128174
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,2048,32,0,0.10950932900110881
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,2048,4,0,0.6883947054545084
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,3072,16,0,0.383786678314209
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,2048,2,0,1.3304533163706462
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,3072,8,0,0.714949369430542
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,3072,32,0,0.2218453288078308
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,3072,64,0,0.1186346709728241
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,2048,1,0,2.675498644510905
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,3072,4,0,1.3886933326721191
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,3072,16,0,0.3866186539332072
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,3072,8,0,0.7144213517506918
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,2048,1,0,2.6612106959025064
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,3072,32,0,0.2241333325703939
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,3072,4,0,1.3604480425516765
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,3072,64,0,0.11939199765523274
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,3072,2,0,2.7774880727132163
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,4096,16,0,0.6293226480484009
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,4096,8,0,1.1797813574473064
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,3072,2,0,2.743258794148763
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,4096,32,0,0.34967466195424396
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,4096,64,0,0.18067200978597006
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,4096,4,0,2.2825600306193032
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,3072,1,0,5.681018829345703
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,4096,8,0,1.163157304128011
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,4096,16,0,0.628111998240153
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,4096,32,0,0.3447146813074748
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,4096,4,0,2.312533378601074
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,4096,2,0,4.630842526753743
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,4096,64,0,0.181551992893219
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,16,1,0,0.06087466577688853
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,3072,1,0,5.546522776285808
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,16,2,0,0.036570665736993156
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,16,4,0,0.024400000770886738
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,16,16,0,0.014970666418472925
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,16,8,0,0.018042666216691334
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,16,32,0,0.014111999422311783
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,16,64,0,0.013536000003417334
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,16,1,0,0.060405333836873375
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,16,2,0,0.03634133438269297
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,16,4,0,0.024480000138282776
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,16,8,0,0.017770666629076004
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,16,16,0,0.014970666418472925
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,16,64,0,0.013552000125249227
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,16,32,0,0.014149333039919535
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,32,1,0,0.06694933275381725
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,32,4,0,0.024853333830833435
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,32,2,0,0.03955733279387156
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,32,8,0,0.01836799954374631
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,32,16,0,0.015002666662136713
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,32,32,0,0.014256000518798828
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,32,64,0,0.013983999689420065
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,32,2,0,0.039919999738534294
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,32,4,0,0.02510933329661687
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,32,1,0,0.06651199857393901
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,32,8,0,0.0183999997874101
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,32,16,0,0.015135999768972397
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,32,32,0,0.014362666755914688
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,32,64,0,0.01402666668097178
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,4096,2,0,4.879557291666667
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,64,8,0,0.02077866718173027
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,64,4,0,0.030229332546393078
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,64,16,0,0.01681600014368693
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,64,2,0,0.0507893313964208
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,64,32,0,0.015552000453074774
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,64,1,0,0.08610666791598003
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,64,64,0,0.015365333606799444
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,64,16,0,0.016997333616018295
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,64,8,0,0.020762667059898376
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,64,4,0,0.03044266750415166
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,64,2,0,0.05017599960168203
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,64,32,0,0.015557333827018738
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,64,1,0,0.08577066659927368
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,64,64,0,0.015018666783968607
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,128,8,0,0.029930666089057922
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,128,4,0,0.049226666490236916
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,128,16,0,0.02014933278163274
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,128,32,0,0.018320000420014065
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,128,2,0,0.08516800403594971
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,128,64,0,0.017562666287024815
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,128,1,0,0.16081066926320395
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,128,8,0,0.029546665648619335
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,128,4,0,0.049695998430252075
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,128,16,0,0.019904000063737232
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,128,2,0,0.0846613347530365
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,128,32,0,0.01754666616519292
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,128,64,0,0.01759999990463257
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,128,1,0,0.1567626694838206
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,256,8,0,0.05462933580080668
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,256,16,0,0.03254399945338567
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,256,4,0,0.08073066671689351
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,256,32,0,0.022240000466505688
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,8,4096,1,0,9.887306849161783
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,256,64,0,0.020341333001852036
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,256,2,0,0.14473066727320352
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,256,8,0,0.05499200026194254
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,256,16,0,0.03260799994071325
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,256,32,0,0.022101332743962605
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,256,4,0,0.08125866452852885
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,256,1,0,0.2715466618537903
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,256,64,0,0.0206133338312308
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,256,2,0,0.14526933431625366
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,512,16,0,0.06448000172773997
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,512,32,0,0.039077334105968475
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,512,8,0,0.09849599997202556
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,256,1,0,0.27246934175491333
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,512,4,0,0.17618666092554727
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,512,64,0,0.02870933214823405
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,8,4096,1,0,9.820213317871094
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,512,16,0,0.06462400158246358
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,512,8,0,0.10032533605893452
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,512,2,0,0.3253866632779439
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,512,4,0,0.17697066068649292
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,512,32,0,0.039477333426475525
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,512,64,0,0.028592000404993694
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,512,2,0,0.32709866762161255
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1024,16,0,0.13884266217549643
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1024,32,0,0.08089600006739299
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1024,8,0,0.2416693369547526
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,512,1,0,0.6388426621754965
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1024,64,0,0.05356266597906748
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1024,4,0,0.45472534497578937
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,512,1,0,0.631717324256897
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1024,8,0,0.24170666933059692
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1024,16,0,0.13986133535703024
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1024,32,0,0.08055466910203297
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1024,64,0,0.05367999772230784
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1024,2,0,0.8655839761098226
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1024,4,0,0.4556159973144531
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1536,16,0,0.24452267090479532
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1024,2,0,0.873189369837443
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1536,8,0,0.4380799929300944
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1536,32,0,0.14545067151387533
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1536,64,0,0.08319466809431712
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1536,4,0,0.8445066610972086
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1024,1,0,1.7492052714029949
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1536,16,0,0.24479466676712036
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1536,8,0,0.4413226842880249
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1024,1,0,1.7402933438618977
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1536,2,0,1.6598079999287922
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1536,64,0,0.08356799681981404
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1536,4,0,0.8440319697062174
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1536,32,0,0.14525866508483887
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,2048,16,0,0.37908267974853516
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,2048,8,0,0.7013813654581705
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1536,2,0,1.6786400477091472
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,2048,32,0,0.21689067284266153
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,2048,64,0,0.12120532989501953
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,2048,4,0,1.3542505900065105
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,1536,1,0,3.3784427642822266
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,2048,8,0,0.7011199792226156
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,2048,16,0,0.3792693217595418
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,2048,4,0,1.3523999849955242
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,2048,64,0,0.12220266461372375
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,1536,1,0,3.268453280131022
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,2048,32,0,0.21497066815694174
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,2048,2,0,2.744943936665853
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,16,2,0,0.06071466704209646
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,16,1,0,0.1076746682325999
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,16,4,0,0.03625066578388214
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,16,8,0,0.02388266722361247
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,16,16,0,0.0182239996890227
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,16,32,0,0.014901333798964819
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,16,64,0,0.014208000153303146
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,16,2,0,0.0613013356924057
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,16,4,0,0.03565866748491923
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,16,1,0,0.1069599986076355
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,16,8,0,0.0239680012067159
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,16,16,0,0.017802666872739792
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,16,32,0,0.014917333920796713
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,16,64,0,0.014165333161751429
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,32,4,0,0.03965866565704346
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,32,2,0,0.06801066795984904
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,32,16,0,0.018794666975736618
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,32,8,0,0.02497066557407379
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,32,1,0,0.12317333618799846
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,32,32,0,0.015466666469971338
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,32,64,0,0.014650666465361914
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,32,4,0,0.03979199876387914
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,32,2,0,0.06772799789905548
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,32,16,0,0.018778666853904724
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,32,8,0,0.02508266766866048
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,32,1,0,0.12341333429018657
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,2048,2,0,2.709967931111654
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,32,32,0,0.015546667079130808
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,32,64,0,0.014720000326633453
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,64,4,0,0.050613333781560264
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,64,16,0,0.021733333667119343
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,64,32,0,0.017946666727463405
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,64,8,0,0.03249600032965342
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,64,2,0,0.08654399712880452
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,64,64,0,0.01637866720557213
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,64,1,0,0.1590666671593984
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,64,8,0,0.03188266605138779
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,64,4,0,0.050741334756215416
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,64,16,0,0.021530665457248688
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,64,2,0,0.08705600102742513
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,64,32,0,0.017893332988023758
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,64,64,0,0.016303999970356624
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,64,1,0,0.15837867061297098
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,128,16,0,0.031845333675543465
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,128,8,0,0.05089066425959269
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,128,4,0,0.08728532989819844
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,128,32,0,0.021717332303524017
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,128,64,0,0.01982933282852173
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,128,2,0,0.15639999508857727
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,128,16,0,0.0306986669699351
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,128,4,0,0.08662933111190796
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,128,8,0,0.05022400120894114
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,128,1,0,0.2963680028915405
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,128,64,0,0.019519999623298645
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,128,2,0,0.15664000312487283
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,128,32,0,0.021967999637126923
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,16,2048,1,0,5.685125350952148
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,128,1,0,0.2969333330790202
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,256,16,0,0.05829333265622457
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,256,8,0,0.08460799853006999
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,256,32,0,0.03724266588687897
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,256,64,0,0.026394667724768322
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,256,4,0,0.1474453310171763
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,256,16,0,0.05932799975077311
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,256,2,0,0.27305599053700763
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,256,8,0,0.08503466844558716
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,256,32,0,0.03748266647259394
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,256,4,0,0.14783466855684915
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,256,64,0,0.02554133286078771
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,16,2048,1,0,5.465722401936849
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,256,2,0,0.2717120051383972
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,256,1,0,0.522325317064921
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,512,32,0,0.07197333375612895
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,512,8,0,0.18293333053588867
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,512,16,0,0.10726400216420491
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,512,64,0,0.047269334395726524
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,512,4,0,0.33347201347351074
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,256,1,0,0.523914655049642
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,512,32,0,0.07195733487606049
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,512,8,0,0.18238399426142374
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,512,16,0,0.10550933082898457
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,512,64,0,0.0468800018231074
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,512,4,0,0.3352320194244385
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,512,2,0,0.6334559917449951
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,1024,16,0,0.25274133682250977
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,512,2,0,0.6451093355814616
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,1024,8,0,0.46693865458170575
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,1024,32,0,0.14993600050608316
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,1024,64,0,0.09380799531936646
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,512,1,0,1.2774293422698975
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,1024,4,0,0.8856586615244547
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,512,1,0,1.263541301091512
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,1024,16,0,0.25411200523376465
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,1024,8,0,0.4651999870936076
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,1024,32,0,0.15427733461062113
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,1024,64,0,0.09310400485992432
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,16,2,0,0.10885333021481831
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,1024,4,0,0.8930506706237793
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,16,4,0,0.06149866680304209
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,16,1,0,0.23123733202616373
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,16,8,0,0.0359199990828832
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,1024,2,0,1.7462080319722493
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,16,16,0,0.024485332270463307
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,16,32,0,0.018239999810854595
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,16,64,0,0.015135999768972397
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,16,16,0,0.024154665569464367
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,16,8,0,0.03573866685231527
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,16,4,0,0.061093335350354515
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,16,2,0,0.1106719970703125
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,16,1,0,0.23509865999221802
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,16,32,0,0.01836266616980235
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,16,64,0,0.015109332899252573
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,32,4,0,0.06822933256626129
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,32,16,0,0.026127999027570088
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,32,8,0,0.041402667760849
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,32,2,0,0.12320533394813538
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,32,32,0,0.01926933353145917
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,32,64,0,0.016117333124081295
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,32,1,0,0.23821866512298584
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,32,16,0,0.02587733417749405
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,32,2,0,0.12410666545232137
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,32,8,0,0.041050667564074196
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,32,4,0,0.06830400228500366
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,32,1,0,0.24012267589569092
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,32,64,0,0.01609066625436147
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,1024,2,0,1.7213652928670247
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,32,32,0,0.019402666638294857
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,64,32,0,0.022789334257443745
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,64,16,0,0.03518400092919668
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,64,8,0,0.05258666475613912
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,64,4,0,0.08879466851552327
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,64,64,0,0.019098666807015736
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,64,2,0,0.16132799784342447
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,64,16,0,0.03398400048414866
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,64,8,0,0.05249600112438202
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,64,4,0,0.08842133482297261
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,64,1,0,0.30371199051539105
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,64,32,0,0.0227360005180041
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,64,2,0,0.16083199779192606
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,64,64,0,0.01907733331123988
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,128,16,0,0.05426666637261709
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,64,1,0,0.3013333280881246
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,128,8,0,0.09057066837946574
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,128,64,0,0.02609066665172577
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,128,32,0,0.03642133375008901
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,128,4,0,0.16086933016777039
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,32,1024,1,0,3.442821184794108
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,128,2,0,0.29685866832733154
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,128,8,0,0.09118400017420451
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,128,4,0,0.16098666191101074
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,128,32,0,0.03623999903599421
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,128,16,0,0.0544053316116333
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,128,64,0,0.025429333249727886
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,128,2,0,0.2946666677792867
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,128,1,0,0.5759626626968384
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,256,8,0,0.15432000160217285
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,256,16,0,0.09147199988365173
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,256,32,0,0.06835733354091644
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,256,64,0,0.04545066754023234
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,128,1,0,0.5750986735026041
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,256,4,0,0.27622934182484943
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,32,1024,1,0,3.596853256225586
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,256,16,0,0.09148800373077393
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,256,32,0,0.0673333356777827
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,256,8,0,0.1548533340295156
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,256,64,0,0.04558399816354116
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,256,2,0,0.5248426596323649
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,256,4,0,0.2765600085258484
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,512,16,0,0.19491199652353922
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,256,2,0,0.5285226504007975
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,512,8,0,0.34436265627543133
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,512,32,0,0.11741333206494649
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,512,64,0,0.08587200442949931
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,256,1,0,1.0451200008392334
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,512,4,0,0.648197333017985
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,256,1,0,1.0385599931081135
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,512,16,0,0.1941386659940084
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,512,8,0,0.34815998872121173
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,512,64,0,0.0851039985815684
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,512,32,0,0.12230400244394939
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,512,4,0,0.654965321222941
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,16,4,0,0.10939733187357585
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,16,2,0,0.23171732823053995
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,16,1,0,0.46375465393066406
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,16,16,0,0.03640000025431315
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,512,2,0,1.2769280274709065
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,16,8,0,0.062463998794555664
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,16,32,0,0.02473066747188568
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,16,64,0,0.019018666197856266
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,16,8,0,0.06217066446940104
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,16,4,0,0.11013333002726237
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,16,16,0,0.03638399889071783
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,16,2,0,0.23360532522201538
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,16,32,0,0.024746666351954143
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,16,64,0,0.019088000059127808
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,16,1,0,0.4599733352661133
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,32,16,0,0.04343999922275543
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,32,8,0,0.07172266642252605
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,32,4,0,0.12537599603335062
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,32,2,0,0.24490666389465332
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,32,32,0,0.0273333340883255
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,32,64,0,0.02073066681623459
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,512,2,0,1.272432009379069
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,32,1,0,0.48947731653849286
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,32,8,0,0.07172266642252605
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,32,4,0,0.12546666463216147
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,32,16,0,0.04494399825731913
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,32,32,0,0.026933332284291584
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,32,2,0,0.24555200338363647
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,32,64,0,0.02072000006834666
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,32,1,0,0.4887626568476359
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,64,16,0,0.05671466886997223
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,64,8,0,0.09243733684221904
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,64,32,0,0.03825599948565165
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,64,4,0,0.1634773313999176
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,64,64,0,0.02661866694688797
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,64,2,0,0.3076159954071045
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,64,8,0,0.09215466181437175
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,64,4,0,0.16337066888809204
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,64,16,0,0.05677866439024607
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,64,32,0,0.03823466598987579
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,64,64,0,0.026608000199000042
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,64,512,1,0,2.551898638407389
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,64,2,0,0.30955199400583905
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,64,1,0,0.590170661608378
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,128,16,0,0.09794132908185323
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,128,32,0,0.06270933151245117
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,128,8,0,0.17022399107615152
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,64,1,0,0.5922026634216309
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,128,64,0,0.045093332727750145
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,128,4,0,0.30694399277369183
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,64,512,1,0,2.536186695098877
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,128,8,0,0.1676479975382487
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,128,16,0,0.09712533156077068
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,128,2,0,0.5748480161031088
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,128,64,0,0.04450133442878723
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,128,32,0,0.06317333380381267
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,128,4,0,0.3076639970143636
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,256,16,0,0.16689066092173258
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,128,2,0,0.5832693179448446
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,256,8,0,0.28860799471537274
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,256,32,0,0.10377599795659383
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,256,64,0,0.0804319977760315
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,128,1,0,1.1464266777038574
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,256,4,0,0.5346346696217855
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,128,1,0,1.1482880115509033
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,256,16,0,0.16685867309570312
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,256,8,0,0.28887999057769775
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,256,32,0,0.10880000392595927
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,256,64,0,0.0798826664686203
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,256,4,0,0.5374559958775839
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,16,4,0,0.23337600628534952
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,16,2,0,0.4646986722946167
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,16,8,0,0.1130506694316864
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,256,2,0,1.0501226584116619
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,16,16,0,0.06490133206049602
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,16,1,0,0.9371893405914307
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,16,32,0,0.038245332737763725
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,16,64,0,0.026522666215896606
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,16,16,0,0.06490133206049602
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,16,8,0,0.11249066392580669
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,16,4,0,0.23891733090082803
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,16,32,0,0.03825066735347112
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,16,2,0,0.46906133492787677
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,16,64,0,0.026314665873845417
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,16,1,0,0.9336427052815756
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,32,8,0,0.12717866897583008
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,256,2,0,1.046613295873006
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,32,16,0,0.07493866483370464
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,32,4,0,0.24782933791478476
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,32,32,0,0.047482664386431374
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,32,64,0,0.03189333279927572
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,32,2,0,0.507482647895813
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,32,8,0,0.12825600306193033
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,32,4,0,0.2485546668370565
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,32,16,0,0.0755626658598582
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,32,32,0,0.04721599817276001
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,32,64,0,0.031311998764673867
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,32,2,0,0.49748798211415607
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,32,1,0,0.985642671585083
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,64,16,0,0.0997759997844696
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,64,8,0,0.17174400885899863
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,64,32,0,0.06489600241184235
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,128,256,1,0,2.0780693689982095
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,32,1,0,0.9858240286509196
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,64,64,0,0.047093331813812256
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,64,4,0,0.3113386631011963
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,64,16,0,0.09913600484530131
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,64,8,0,0.1713226636250814
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,64,32,0,0.06446933249632518
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,64,2,0,0.6023946603139242
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,64,64,0,0.04690133531888326
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,64,4,0,0.31330666939417523
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,128,256,1,0,2.0704213778177896
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,64,2,0,0.6095733245213827
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,128,16,0,0.1816213329633077
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,128,8,0,0.3115520079930623
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,128,32,0,0.11150399843851726
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,128,64,0,0.07707733412583669
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,64,1,0,1.1928479671478271
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,128,4,0,0.6020586490631104
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,64,1,0,1.1827253500620525
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,128,8,0,0.3192053238550822
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,128,16,0,0.18037333091100058
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,128,32,0,0.11569066842397054
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,128,64,0,0.08055999875068665
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,128,4,0,0.6012639999389648
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,128,2,0,1.135269323984782
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,128,2,0,1.1384533246358235
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,fp8,256,128,1,0,2.2798026402791343
TRTLLM,0.20.0,NVIDIA H200,mla_context,float16,float16,256,128,1,0,2.284325281778971
