# ProofPile 2: Algebraic Stack Data
proofpile-2-stack,preprocessed/700B_olmo2_sample/algebraic_stack/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-0-00000.npy
proofpile-2-stack,preprocessed/700B_olmo2_sample/algebraic_stack/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-1-00000.npy
proofpile-2-stack,preprocessed/700B_olmo2_sample/algebraic_stack/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-2-00000.npy
proofpile-2-stack,preprocessed/700B_olmo2_sample/algebraic_stack/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-3-00000.npy
proofpile-2-stack,preprocessed/700B_olmo2_sample/algebraic_stack/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-4-00000.npy
proofpile-2-stack,preprocessed/700B_olmo2_sample/algebraic_stack/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-5-00000.npy
proofpile-2-stack,preprocessed/700B_olmo2_sample/algebraic_stack/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-6-00000.npy

# ProofPile 2: Arxiv Data
proofpile-2-arxiv,preprocessed/700B_olmo2_sample/arxiv/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-0-00000.npy
proofpile-2-arxiv,preprocessed/700B_olmo2_sample/arxiv/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-1-00000.npy
proofpile-2-arxiv,preprocessed/700B_olmo2_sample/arxiv/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-2-00000.npy
proofpile-2-arxiv,preprocessed/700B_olmo2_sample/arxiv/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-3-00000.npy
proofpile-2-arxiv,preprocessed/700B_olmo2_sample/arxiv/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-4-00000.npy

# ProofPile 2: Open Web Math Data
proofpile-2-open-web-math,preprocessed/700B_olmo2_sample/owm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-0-00000.npy
proofpile-2-open-web-math,preprocessed/700B_olmo2_sample/owm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-1-00000.npy
proofpile-2-open-web-math,preprocessed/700B_olmo2_sample/owm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-2-00000.npy
proofpile-2-open-web-math,preprocessed/700B_olmo2_sample/owm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-3-00000.npy

# Pes2o Data
pes2o,preprocessed/700B_olmo2_sample/pes2o/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-0-00000.npy
pes2o,preprocessed/700B_olmo2_sample/pes2o/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-1-00000.npy
pes2o,preprocessed/700B_olmo2_sample/pes2o/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-2-00000.npy
pes2o,preprocessed/700B_olmo2_sample/pes2o/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-3-00000.npy
pes2o,preprocessed/700B_olmo2_sample/pes2o/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-4-00000.npy

# Starcoder Data (fixed!)
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-00-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-01-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-02-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-03-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-04-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-05-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-06-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-07-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-08-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-09-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-10-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-11-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-12-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-13-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-14-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-15-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-16-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-17-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-18-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-19-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-20-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-21-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-22-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-23-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-24-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-25-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-26-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-27-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-28-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-29-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-30-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-31-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-32-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-33-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-34-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-35-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-36-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-37-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-38-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-39-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-40-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-41-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-42-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-43-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-44-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-45-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-46-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-47-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-48-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-49-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-50-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-51-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-52-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-53-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-54-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-55-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-56-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-57-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-58-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-59-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-60-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-61-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-62-00000.npy
starcoder,preprocessed/700B_olmo2_sample/starcoder/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-63-00000.npy

# DCLM Data
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-00-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-00-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-01-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-01-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-02-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-02-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-03-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-03-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-04-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-04-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-05-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-05-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-06-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-06-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-07-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-07-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-08-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-08-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-09-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-09-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-10-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-10-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-11-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-11-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-12-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-12-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-13-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-13-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-14-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-14-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-15-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-15-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-16-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-16-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-17-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-17-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-18-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-18-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-19-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-19-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-20-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-20-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-21-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-21-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-22-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-22-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-23-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-23-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-24-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-24-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-25-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-25-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-26-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-26-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-27-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-27-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-28-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-28-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-29-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-29-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-30-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-30-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-31-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-31-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-32-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-32-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-33-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-33-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-34-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-34-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-35-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-35-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-36-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-36-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-37-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-37-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-38-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-38-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-39-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-39-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-40-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-40-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-41-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-41-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-42-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-42-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-43-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-43-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-44-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-44-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-45-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-45-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-46-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-46-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-47-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-47-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-48-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-48-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-49-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-49-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-50-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-50-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-51-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-51-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-52-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-52-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-53-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-53-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-54-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-54-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-55-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-55-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-56-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-56-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-57-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-57-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-58-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-58-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-59-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-59-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-60-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-60-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-61-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-61-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-62-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-62-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-63-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-63-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-64-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-64-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-65-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-65-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-66-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-66-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-67-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-67-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-68-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-68-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-69-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-69-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-70-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-70-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-71-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-71-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-72-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-72-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-73-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-73-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-74-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-74-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-75-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-75-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-76-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-76-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-77-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-77-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-78-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-78-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-79-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-79-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-80-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-80-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-81-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-81-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-82-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-82-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-83-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-83-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-84-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-84-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-85-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-85-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-86-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-86-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-87-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-87-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-88-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-88-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-89-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-89-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-90-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-90-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-91-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-91-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-92-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-92-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-93-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-93-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-94-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-94-00001.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-95-00000.npy
dclm,preprocessed/700B_olmo2_sample/dclm/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-95-00001.npy

# Wikipedia
wikipedia,preprocessed/700B_olmo2_sample/wiki/home/ec2-user/{TOKENIZER}/tokenizer.json_bos/part-0-00000.npy
