.dockerignore
.gitattributes
.gitignore
Dockerfile
LICENSE
README.md
approvaltests_config.json
cli
config.yml
pyproject.toml
.github/dependabot.yml
.github/release.yml
.github/workflows/ci.yml
.idea/.gitignore
.idea/.name
.idea/dataSources.xml
.idea/jsLibraryMappings.xml
.idea/misc.xml
.idea/modules.xml
.idea/php.xml
.idea/vcs.xml
.idea/codeStyles/codeStyleConfig.xml
.idea/inspectionProfiles/Project_Default.xml
archive_query_log/__init__.py
archive_query_log/__main__.py
archive_query_log/cdx.py
archive_query_log/config.py
archive_query_log/namespaces.py
archive_query_log/orm.py
archive_query_log.egg-info/PKG-INFO
archive_query_log.egg-info/SOURCES.txt
archive_query_log.egg-info/dependency_links.txt
archive_query_log.egg-info/entry_points.txt
archive_query_log.egg-info/requires.txt
archive_query_log.egg-info/top_level.txt
archive_query_log/cli/__init__.py
archive_query_log/cli/archives.py
archive_query_log/cli/captures.py
archive_query_log/cli/monitoring.py
archive_query_log/cli/providers.py
archive_query_log/cli/sources.py
archive_query_log/cli/util.py
archive_query_log/legacy/__init__.py
archive_query_log/legacy/config.py
archive_query_log/legacy/conftest.py
archive_query_log/legacy/generate_review_sample.py
archive_query_log/legacy/service_stats.py
archive_query_log/legacy/test_fastwarc.py
archive_query_log/legacy/cli/__init__.py
archive_query_log/legacy/cli/alexa.py
archive_query_log/legacy/cli/corpus.py
archive_query_log/legacy/cli/external.py
archive_query_log/legacy/cli/index.py
archive_query_log/legacy/cli/main.py
archive_query_log/legacy/cli/make.py
archive_query_log/legacy/cli/util.py
archive_query_log/legacy/download/__init__.py
archive_query_log/legacy/download/iterable.py
archive_query_log/legacy/download/raw.py
archive_query_log/legacy/download/warc.py
archive_query_log/legacy/index/__init__.py
archive_query_log/legacy/model/__init__.py
archive_query_log/legacy/model/highlight.py
archive_query_log/legacy/model/parse.py
archive_query_log/legacy/queries/__init__.py
archive_query_log/legacy/queries/iterable.py
archive_query_log/legacy/queries/parse.py
archive_query_log/legacy/results/__init__.py
archive_query_log/legacy/results/chatnoir.py
archive_query_log/legacy/results/parse.py
archive_query_log/legacy/results/test/__init__.py
archive_query_log/legacy/results/test/generate_tests.py
archive_query_log/legacy/results/test/test_360_serp_parsing.py
archive_query_log/legacy/results/test/test_aliexpress_serp_parsing.py
archive_query_log/legacy/results/test/test_amazon_serp_parsing.py
archive_query_log/legacy/results/test/test_baidu_serp_parsing.py
archive_query_log/legacy/results/test/test_bing_serp_parsing.py
archive_query_log/legacy/results/test/test_bongacams_serp_parsing.py
archive_query_log/legacy/results/test/test_canva_serp_parsing.py
archive_query_log/legacy/results/test/test_cnn_serp_parsing.py
archive_query_log/legacy/results/test/test_csdn_serp_parsing.py
archive_query_log/legacy/results/test/test_ebay_serp_parsing.py
archive_query_log/legacy/results/test/test_espn_serp_parsing.py
archive_query_log/legacy/results/test/test_etsy_serp_parsing.py
archive_query_log/legacy/results/test/test_facebook_serp_parsing.py
archive_query_log/legacy/results/test/test_github_serp_parsing.py
archive_query_log/legacy/results/test/test_google_serp_parsing.py
archive_query_log/legacy/results/test/test_imdb_serp_parsing.py
archive_query_log/legacy/results/test/test_imgur_serp_parsing.py
archive_query_log/legacy/results/test/test_indeed_serp_parsing.py
archive_query_log/legacy/results/test/test_jd_serp_parsing.py
archive_query_log/legacy/results/test/test_linkedin_serp_parsing.py
archive_query_log/legacy/results/test/test_manual_facebook_serp_parsing.py
archive_query_log/legacy/results/test/test_manual_google_serp_parsing.py
archive_query_log/legacy/results/test/test_manual_youtube_serp_parsing.py
archive_query_log/legacy/results/test/test_naver_serp_parsing.py
archive_query_log/legacy/results/test/test_pornhub_serp_parsing.py
archive_query_log/legacy/results/test/test_qq_serp_parsing.py
archive_query_log/legacy/results/test/test_reddit_serp_parsing.py
archive_query_log/legacy/results/test/test_roblox_serp_parsing.py
archive_query_log/legacy/results/test/test_sogou_serp_parsing.py
archive_query_log/legacy/results/test/test_stackoverflow_serp_parsing.py
archive_query_log/legacy/results/test/test_tribunnews_serp_parsing.py
archive_query_log/legacy/results/test/test_twitch_serp_parsing.py
archive_query_log/legacy/results/test/test_twitter_serp_parsing.py
archive_query_log/legacy/results/test/test_utils.py
archive_query_log/legacy/results/test/test_vk_serp_parsing.py
archive_query_log/legacy/results/test/test_weibo_serp_parsing.py
archive_query_log/legacy/results/test/test_wikimedia_serp_parsing.py
archive_query_log/legacy/results/test/test_xvideos_serp_parsing.py
archive_query_log/legacy/results/test/test_yahoo_serp_parsing.py
archive_query_log/legacy/results/test/test_yandex_serp_parsing.py
archive_query_log/legacy/results/test/test_youtube_serp_parsing.py
archive_query_log/legacy/serps/__init__.py
archive_query_log/legacy/serps/iterable.py
archive_query_log/legacy/services/__init__.py
archive_query_log/legacy/services/aggregate_services.py
archive_query_log/legacy/services/alexa.py
archive_query_log/legacy/services/search_forms.py
archive_query_log/legacy/services/test_services.py
archive_query_log/legacy/services/update_yaml.py
archive_query_log/legacy/urls/__init__.py
archive_query_log/legacy/urls/fetch.py
archive_query_log/legacy/urls/iterable.py
archive_query_log/legacy/util/__init__.py
archive_query_log/legacy/util/archive_http.py
archive_query_log/legacy/util/html.py
archive_query_log/legacy/util/http_session.py
archive_query_log/legacy/util/iterable.py
archive_query_log/legacy/util/serialization.py
archive_query_log/legacy/util/text.py
archive_query_log/legacy/util/urls.py
archive_query_log/templates/home.html
archive_query_log/utils/__init__.py
archive_query_log/utils/es.py
archive_query_log/utils/time.py
archive_query_log/web/__init__.py
archive_query_log/web/home.py
data/.gitignore
data/selected-services.yaml
data/examples/results.jsonl
data/examples/serps.jsonl
data/manual-annotations/archived-raw-serps/expected/.gitignore
data/manual-annotations/archived-raw-serps/expected/360-21tian-jian-fei-fa-1653652529.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-an-jian-diao-cha-bi-lu-1576497545.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-chen-teng-tan-han-han-mei-ren-jie-ji-1577810108.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-duo-jia-ju-chang-qu-xiao-yan-chu-1579663841.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-hui-ji-pei-xun-1579668897.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-jin-ji-che-hui-1576637402.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-ju-min-jia-zhong-fei-jin-bian-fu-1585788499.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-lu-xing-she-tuan-dui-you-zan-ting-1580014331.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-mao-bu-yi-xiao-chou-chao-xi-1576791478.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-mei-guo-qian-zheng-1579954288.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-niu-jie-5hao-1576819311.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-shi-jie-zui-ai-de-ren-qu-shi-1579483438.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-si-chuan-fa-sheng-6-8ji-di-zhen-1662384982.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-site-chaxun-biz-1550145271.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-site-chaxun-biz-1550161171.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-site-chaxun-biz-1551742698.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-site-eng-kaz-enacademic-com-1581787441.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-tao-bao-ru-he-che-hui-ping-jie-1576604187.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-wu-xiu-bo-yang-kun-ju-hui-hai-ge-1577858222.approved.txt
data/manual-annotations/archived-raw-serps/expected/360-zhang-bo-zhi-bei-gou-yao-shang-1661090641.approved.txt
data/manual-annotations/archived-raw-serps/expected/aliexpress-barefoot-accessories-1508385234.approved.txt
data/manual-annotations/archived-raw-serps/expected/aliexpress-children-school-bag-1388612710.approved.txt
data/manual-annotations/archived-raw-serps/expected/aliexpress-jewelry-sets-1390214039.approved.txt
data/manual-annotations/archived-raw-serps/expected/aliexpress-kids-wear-1293514157.approved.txt
data/manual-annotations/archived-raw-serps/expected/aliexpress-q5-phone-1374387535.approved.txt
data/manual-annotations/archived-raw-serps/expected/aliexpress-sterling-silver-gemstone-1384701071.approved.txt
data/manual-annotations/archived-raw-serps/expected/aliexpress-sterling-silver-pandora-charms-1457090417.approved.txt
data/manual-annotations/archived-raw-serps/expected/aliexpress-transmission-shaft-1435605057.approved.txt
data/manual-annotations/archived-raw-serps/expected/aliexpress-waterproof-winter-boots-women-1500418630.approved.txt
data/manual-annotations/archived-raw-serps/expected/aliexpress-women-messenger-bags-1397198302.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-deng-shi-jia-meng-www-baidu-com-aaaa-oq1-2018nian-8yue-19ri-19shi-32fen-18miao-1564357198.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-dnd-flash-mosquito-1633240377.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-eksa-1662208504.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-gaming-monitor-1639065822.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-guang-huaiba-pawameta-1632443655.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-gym-mat-tile-1634791656.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-joseph-menn-1605789427.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-lovense-1618431049.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-monitor-1625069899.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-notebook-dell-1613232661.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-omega-labyrinth-z-1616510370.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-pillow-fsa-1615964514.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-qian-lian-mo-hua-orizinarusaundotoratsuku-1657685343.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-solefit-1663015835.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-tazas-te-transparente-1613035337.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-the-montells-you-can-t-make-me-1617199271.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-the-risks-of-prescription-drugs-1657820625.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-under-armour-socks-1553107494.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-yangumagazin-yanmagasado-1625727694.approved.txt
data/manual-annotations/archived-raw-serps/expected/amazon-zao-dian-jia-meng-pin-pai-www-baidu-com-aaaa-zv9-2018nian-8yue-21ri-17shi-49fen-4miao-1564430721.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-e-eeeae-1522165852.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-e-eeeae-1544443888.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-e-eeeae-1547510808.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-e-eeeae-1554431691.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-e-eeeae-1558963051.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-e-eeeae-1567878230.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-e-eeeae-1569971117.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-eru-1525597474.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-lian-xi-qu-dian-nao-pei-xun-1643390077.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-lian-yun-gang-qi-yang-yue-zi-hui-suo-zhong-xin-jing-zhun-ke-hu-ying-xiao-key668-cn-1537259978.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-long-feng-qu-qi-quan-qi-huo-pei-xun-1642853767.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-long-shi-liang-1639014949.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-mao-kui-lu-mao-cong-cong-mao-ning-kui-mao-dan-dan-mang-lu-1549835769.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-mao-kui-lu-mao-cong-cong-mao-ning-kui-mao-dan-dan-mang-lu-1553838201.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-mao-kui-lu-mao-cong-cong-mao-ning-kui-mao-dan-dan-mang-lu-1558979495.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-mao-kui-lu-mao-cong-cong-mao-ning-kui-mao-dan-dan-mang-lu-1564025418.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-mao-ning-kui-mao-dan-dan-mang-long-lou-mang-lu-shikato-1537858258.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-sexinsex-1-2-1213858525.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-shang-hai-11xuan-5ding-dan-wei-xin-weiwei776699-1545685930.approved.txt
data/manual-annotations/archived-raw-serps/expected/baidu-zhong-guo-lian-tong-smsc-1372160086.approved.txt
data/manual-annotations/archived-raw-serps/expected/bing-florence-caillon-bandcamp-1647897680.approved.txt
data/manual-annotations/archived-raw-serps/expected/bing-great-blue-heron-sound-1660552923.approved.txt
data/manual-annotations/archived-raw-serps/expected/bing-intel-i7-chip-1656625238.approved.txt
data/manual-annotations/archived-raw-serps/expected/bing-kumolife-1388896361.approved.txt
data/manual-annotations/archived-raw-serps/expected/bing-mesozoic-wikipedia-1643229373.approved.txt
data/manual-annotations/archived-raw-serps/expected/bing-regional-asia-taiwan-localities-taichung-city-1580952978.approved.txt
data/manual-annotations/archived-raw-serps/expected/bing-rpg-title-screen-1656104004.approved.txt
data/manual-annotations/archived-raw-serps/expected/bing-uscis-forms-400-1486690408.approved.txt
data/manual-annotations/archived-raw-serps/expected/bing-win10-iso-xia-zai-1580349240.approved.txt
data/manual-annotations/archived-raw-serps/expected/bing-wonder-kids-beginning-1652132287.approved.txt
data/manual-annotations/archived-raw-serps/expected/bongacams-asslicking-1623701105.approved.txt
data/manual-annotations/archived-raw-serps/expected/bongacams-camshow-1653633300.approved.txt
data/manual-annotations/archived-raw-serps/expected/bongacams-cock-sucking-1617355851.approved.txt
data/manual-annotations/archived-raw-serps/expected/bongacams-facial-1578106424.approved.txt
data/manual-annotations/archived-raw-serps/expected/bongacams-fucking-1577494848.approved.txt
data/manual-annotations/archived-raw-serps/expected/bongacams-hd-plus-1576443879.approved.txt
data/manual-annotations/archived-raw-serps/expected/bongacams-massage-1604010948.approved.txt
data/manual-annotations/archived-raw-serps/expected/bongacams-stripping-1623091287.approved.txt
data/manual-annotations/archived-raw-serps/expected/bongacams-stripping-1648087966.approved.txt
data/manual-annotations/archived-raw-serps/expected/bongacams-teasing-1635992326.approved.txt
data/manual-annotations/archived-raw-serps/expected/canva-instagram-reels-video-1607594697.approved.txt
data/manual-annotations/archived-raw-serps/expected/cnn-achievable-possibility-1642416655.approved.txt
data/manual-annotations/archived-raw-serps/expected/cnn-apple-watch-2016-1662575890.approved.txt
data/manual-annotations/archived-raw-serps/expected/cnn-biteukoinsaryeomyeon-www-99m-kr-bei-biteukoinsaryelie-biteukoinsaeobbbiteukoinsaeobjadeungrog-biteuk-1647153716.approved.txt
data/manual-annotations/archived-raw-serps/expected/cnn-biteukoinsaryeomyeon-www-99m-kr-bei-biteukoinsaryelie-biteukoinsaeobbbiteukoinsaeobjadeungrog-biteukoinsayongbangbeob8reduplicate-1647153716.approved.txt
data/manual-annotations/archived-raw-serps/expected/cnn-dxa-1634498874.approved.txt
data/manual-annotations/archived-raw-serps/expected/cnn-kindly-check-1642508434.approved.txt
data/manual-annotations/archived-raw-serps/expected/cnn-march-4-1633908556.approved.txt
data/manual-annotations/archived-raw-serps/expected/cnn-north-dakota-coronavirus-1632793271.approved.txt
data/manual-annotations/archived-raw-serps/expected/cnn-skilled-1643388398.approved.txt
data/manual-annotations/archived-raw-serps/expected/cnn-trigger-1638271567.approved.txt
data/manual-annotations/archived-raw-serps/expected/cnn-wright-1616229078.approved.txt
data/manual-annotations/archived-raw-serps/expected/csdn-android-studio-1660487187.approved.txt
data/manual-annotations/archived-raw-serps/expected/csdn-fifo-1663204137.approved.txt
data/manual-annotations/archived-raw-serps/expected/csdn-rows-1665572662.approved.txt
data/manual-annotations/archived-raw-serps/expected/ebay-1-4-1566151598.approved.txt
data/manual-annotations/archived-raw-serps/expected/ebay-hu-bei-kuai-san-zhong-jiang-zhu-shou-jia-wei-vxin-weiwei776699-1548361753.approved.txt
data/manual-annotations/archived-raw-serps/expected/ebay-imaginext-batman-motorcycle-1597143554.approved.txt
data/manual-annotations/archived-raw-serps/expected/ebay-liu-he-cai-2o2526-wei-xin-weiwei776699-1562672411.approved.txt
data/manual-annotations/archived-raw-serps/expected/ebay-liu-he-cai-gua-pai-wan-zheng-ban-wei-vxin-weiwei776699-1565563054.approved.txt
data/manual-annotations/archived-raw-serps/expected/ebay-se-xing-mei-jia-wei-vxin-dun35358-1544323503.approved.txt
data/manual-annotations/archived-raw-serps/expected/ebay-shan-dong-11xuan-5-360cai-piao-wei-xin-weiwei776699-1538125942.approved.txt
data/manual-annotations/archived-raw-serps/expected/ebay-smartphones-1334931269.approved.txt
data/manual-annotations/archived-raw-serps/expected/ebay-srch-str-1641320481.approved.txt
data/manual-annotations/archived-raw-serps/expected/ebay-victoria-s-secret-1559541146.approved.txt
data/manual-annotations/archived-raw-serps/expected/espn-ball-state-1615730301.approved.txt
data/manual-annotations/archived-raw-serps/expected/espn-ball-state-1619440827.approved.txt
data/manual-annotations/archived-raw-serps/expected/etsy-andrew-kim-1566153959.approved.txt
data/manual-annotations/archived-raw-serps/expected/etsy-embroidery-kit-1375805089.approved.txt
data/manual-annotations/archived-raw-serps/expected/etsy-hard-plastic-1333819223.approved.txt
data/manual-annotations/archived-raw-serps/expected/etsy-invitation-1367234841.approved.txt
data/manual-annotations/archived-raw-serps/expected/etsy-pacifier-1336437253.approved.txt
data/manual-annotations/archived-raw-serps/expected/etsy-pendants-1667648711.approved.txt
data/manual-annotations/archived-raw-serps/expected/etsy-small-animal-1378528595.approved.txt
data/manual-annotations/archived-raw-serps/expected/etsy-storage-and-organization-1632881953.approved.txt
data/manual-annotations/archived-raw-serps/expected/etsy-sundress-1374472455.approved.txt
data/manual-annotations/archived-raw-serps/expected/etsy-yan-dong-kuai-le-shi-fen-shou-ji-ban-jia-wei-xin-xw639-2018-1537911471.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-1-million-cards-1614844146.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-3dnoam-chomsky-1611582471.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-3dtaylor-company-1611665585.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-5-orszagos-1618089409.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-7-1620274873.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-abbvie-1623327630.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-aj-duca-1388091562.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-alda-lesbiennes-refugiees-1615284371.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-anthony-1376743219.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-bernieorbust-1467812085.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-blog-post-319-je31-1567459151.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-blog-post-334-bootload-1567494170.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-cruzcrew-1459272010.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-deanna-sanchez-1629215596.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-greet-1623235952.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-https-peelarchivesblog-com-about-peel-1599241783.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-ineligible-1466870871.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-jam-of-the-day-1410919861.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-mens-health-survival-of-the-fittest-1619473718.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-mr-robot-1469187052.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-rosy-20gupta-1494524363.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-social-plugins-boutons-jaime-envoyer-partager-et-citations-js-exec-je31-1567485463.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-solcellespecialisten-1389488036.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-tag-someone-who-needs-this-1587554575.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-trumptrain-1461904486.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-vanilla-1481832838.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-victoria-pynchon-1294673180.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-virpi-soikkeli-1623257178.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-wisconsin-1463064570.approved.txt
data/manual-annotations/archived-raw-serps/expected/facebook-www-9xcb-biz-webex-setup-was-unsuccessful-error-23-1404412853.approved.txt
data/manual-annotations/archived-raw-serps/expected/github-jie-shi-de-jing-mi-gong-ye-you-xian-gong-si-1597737683.approved.txt
data/manual-annotations/archived-raw-serps/expected/github-licensing-gethhwid-1662581846.approved.txt
data/manual-annotations/archived-raw-serps/expected/github-mastodon-1657854340.approved.txt
data/manual-annotations/archived-raw-serps/expected/github-org-zettlr-1591675518.approved.txt
data/manual-annotations/archived-raw-serps/expected/github-subrock-1653099784.approved.txt
data/manual-annotations/archived-raw-serps/expected/github-topic-bootswatch-org-sslcom-1552777180.approved.txt
data/manual-annotations/archived-raw-serps/expected/github-topic-deprecated-org-bandwidth-fork-true-1634361552.approved.txt
data/manual-annotations/archived-raw-serps/expected/github-topic-docker-stack-org-issuu-1549097482.approved.txt
data/manual-annotations/archived-raw-serps/expected/github-topic-id-org-codercom-1553274306.approved.txt
data/manual-annotations/archived-raw-serps/expected/github-topic-web-components-org-github-fork-true-1650012538.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-243-1628713922.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-9-11-revisited-1618106149.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-agust-1614235489.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-atalanta-bergamasca-calcio-wikipedia-1643703998.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-attack-on-titan-season-3-part-2-1556408967.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-bill-easley-sophisticated-prelude-1626498182.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-chi-zhen-nzhong-rong-keteru-1608917828.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-cortisol-test-1623205666.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-coxsackie-virus-1617107799.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-dead-cock-mortuary-1614203203.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-does-steve-has-a-beard-1601705030.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-dove-soap-1617107771.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-finance-1540158323.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-flop-thumbs-up-1568839345.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-hawaii-part-ii-lyrics-1633318830.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-hola-games-1552338270.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-homemade-l-1617107839.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-lenin-1614385592.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-ocolc-826746-1240020639.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-orlando-bedoya-site-wikipedia-org-site-wikimedia-org-1629326434.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-scholar-a-tumeo-m-branca-l-camerini-a-dual-priority-realtime-multiprocessor-system-on-fpga-for-automotive-ap-1614181186.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-scholar-muhammed-rashid-1656890873.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-scholar-w-fan-j-li-s-ma-n-tang-and-w-yu-april-2012-towards-certain-fixes-with-editing-rules-and-master-data-1614165399.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-susan-boyle-make-me-a-channel-of-your-peace-1607791072.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-taikoo-hui-mandarin-oriental-hotel-guangzhou-1652086766.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-why-is-one-foot-slightly-larger-than-the-other-one-1605140430.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-win10-iso-xia-zai-1577717811.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-www-boston-hotels-cheap-net-1062123561.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-www-exactresult-com-1062373767.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-www-wallstquotes-com-1062140693.approved.txt
data/manual-annotations/archived-raw-serps/expected/google-zombie-apocalypse-1565114134.approved.txt
data/manual-annotations/archived-raw-serps/expected/imdb-0609265-s-nm-1329020836.approved.txt
data/manual-annotations/archived-raw-serps/expected/imdb-angelina-jolie-1452700725.approved.txt
data/manual-annotations/archived-raw-serps/expected/imdb-dogville-1187303706.approved.txt
data/manual-annotations/archived-raw-serps/expected/imdb-gundula-rapsch-1628094679.approved.txt
data/manual-annotations/archived-raw-serps/expected/imdb-hunger-games-1518585690.approved.txt
data/manual-annotations/archived-raw-serps/expected/imdb-marcela-gomez-montoya-1614546944.approved.txt
data/manual-annotations/archived-raw-serps/expected/imdb-murder-world-1268209692.approved.txt
data/manual-annotations/archived-raw-serps/expected/imdb-pulse-1283006912.approved.txt
data/manual-annotations/archived-raw-serps/expected/imdb-sam-claflin-1472223834.approved.txt
data/manual-annotations/archived-raw-serps/expected/imdb-the-expanse-1521743964.approved.txt
data/manual-annotations/archived-raw-serps/expected/imgur-search-term-string-1547858079.approved.txt
data/manual-annotations/archived-raw-serps/expected/imgur-search-term-string-1565643838.approved.txt
data/manual-annotations/archived-raw-serps/expected/indeed-60000-1450529560.approved.txt
data/manual-annotations/archived-raw-serps/expected/indeed-concept-development-integration-1647187061.approved.txt
data/manual-annotations/archived-raw-serps/expected/indeed-customer-service-1447983624.approved.txt
data/manual-annotations/archived-raw-serps/expected/indeed-hca-corporate-1334425152.approved.txt
data/manual-annotations/archived-raw-serps/expected/indeed-higher-education-freelance-network-1585036169.approved.txt
data/manual-annotations/archived-raw-serps/expected/indeed-international-english-prep-academy-iepa-1586237411.approved.txt
data/manual-annotations/archived-raw-serps/expected/indeed-jvm-lending-1586272513.approved.txt
data/manual-annotations/archived-raw-serps/expected/indeed-origins-macy-s-1353165507.approved.txt
data/manual-annotations/archived-raw-serps/expected/indeed-pepsico-1487893385.approved.txt
data/manual-annotations/archived-raw-serps/expected/indeed-steiner-business-solutions-1577184429.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-1368578723.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-ch-1382528052.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-che-zai-cd-1507664971.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-dao-tian-1442736754.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-guan-yin-liao-1501752634.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-iu-vdun35358z-1537907650.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-ji-guang-da-yin-1516054404.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-mo-yan-1516163665.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-qing-shang-1510505248.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-sha-tan-ku-nan-1446537277.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-shlrwanh-uq-2179706936-1473350033.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-song-yan-1434320372.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-tai-kong-zhen-1508948206.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-tuan-dui-guan-li-1429813603.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-weiwei776699e-1537978473.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-xian-xing-dai-shu-1497236213.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-yang-hong-wei-1429640037.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-you-xi-1511265243.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-yu-diao-jiu-ning-meng-1601078153.approved.txt
data/manual-annotations/archived-raw-serps/expected/jd-yxingaiujliu-xwcp198-1537926187.approved.txt
data/manual-annotations/archived-raw-serps/expected/linkedin-cryptocurrencies-1556702372.approved.txt
data/manual-annotations/archived-raw-serps/expected/linkedin-g-d-goenka-international-school-surat-wisdom-valley-campus-nr-anuvrat-dwar-new-city-light-road-rcc-c-1568400864.approved.txt
data/manual-annotations/archived-raw-serps/expected/linkedin-g-d-goenka-international-school-surat-wisdom-valley-campus-nr-anuvrat-dwar-new-city-light-road-rcc-canal-road-bar-surat-gujrat-1568400864.approved.txt
data/manual-annotations/archived-raw-serps/expected/linkedin-james-margolin-fbi-1571382052.approved.txt
data/manual-annotations/archived-raw-serps/expected/linkedin-parent-information-center-of-nj-https-www-linkedin-com-feed-1537815970.approved.txt
data/manual-annotations/archived-raw-serps/expected/linkedin-parent-information-center-of-nj-https-www-linkedin-com-search-results-all-keywords-parent-informatio-1537815969.approved.txt
data/manual-annotations/archived-raw-serps/expected/linkedin-parent-information-center-of-nj-https-www-linkedin-com-search-results-all-keywords-parent-information-center-of-nj-1537815969.approved.txt
data/manual-annotations/archived-raw-serps/expected/linkedin-test-1563279735.approved.txt
data/manual-annotations/archived-raw-serps/expected/linkedin-vizthink-1229875736.approved.txt
data/manual-annotations/archived-raw-serps/expected/naver-10559239-974530320.approved.txt
data/manual-annotations/archived-raw-serps/expected/naver-11548566-973003263.approved.txt
data/manual-annotations/archived-raw-serps/expected/naver-dpsxmfl-1632292908.approved.txt
data/manual-annotations/archived-raw-serps/expected/naver-geobugseon-1439284748.approved.txt
data/manual-annotations/archived-raw-serps/expected/naver-heolkeu-dari-gajin-namja-1397449417.approved.txt
data/manual-annotations/archived-raw-serps/expected/naver-junggangosagongbu-1627424756.approved.txt
data/manual-annotations/archived-raw-serps/expected/naver-monteria-montheria-bunyupoteu-jeongitipoteu-jeongijujeonja-bunyupoteugi-1200ml-1200mlpingkeu-hugi-1640713881.approved.txt
data/manual-annotations/archived-raw-serps/expected/naver-sejongmunhwahoegwan-daegeugjang-1652993423.approved.txt
data/manual-annotations/archived-raw-serps/expected/naver-t-1047137339.approved.txt
data/manual-annotations/archived-raw-serps/expected/naver-z-974687640.approved.txt
data/manual-annotations/archived-raw-serps/expected/pornhub-12-1565436858.approved.txt
data/manual-annotations/archived-raw-serps/expected/pornhub-brazilian-carnival-1378864891.approved.txt
data/manual-annotations/archived-raw-serps/expected/pornhub-cfnm2-1271707129.approved.txt
data/manual-annotations/archived-raw-serps/expected/pornhub-full-porno-films-1633148146.approved.txt
data/manual-annotations/archived-raw-serps/expected/pornhub-kardashian-1232246712.approved.txt
data/manual-annotations/archived-raw-serps/expected/pornhub-mini-skirt-1257166127.approved.txt
data/manual-annotations/archived-raw-serps/expected/pornhub-org-1219567054.approved.txt
data/manual-annotations/archived-raw-serps/expected/pornhub-roxy-reynolds-1222496976.approved.txt
data/manual-annotations/archived-raw-serps/expected/pornhub-stripping-1627962626.approved.txt
data/manual-annotations/archived-raw-serps/expected/pornhub-teen-facial-1237526884.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-danil-kozlovsky-1360453772.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-erin-1359911760.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-feng-mi-lian-1380895166.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-hua-ze-xiang-cai-1408309927.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-james-boshier-1449178049.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-ji-lin-yan-ji-1360789033.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-ji-zhou-dao-ttglao-hu-ji-ping-tai-guan-wang-x-fa33n-comx-1488991663.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-jie-ke-luo-de-wei-er-1445892389.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-jin-cheng-wu-1319745059.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-jin-xiu-long-1320298924.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-lin-xi-tong-1507483207.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-niamh-cusack-1319751306.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-shen-shan-zheng-er-lang-1405119212.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-statue-1536122094.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-tag-encode-1341871817.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-turn-that-finger-around-1324266860.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-xing-xing-di-qiu-2-1582812539.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-zhang-dong-jian-1446133153.approved.txt
data/manual-annotations/archived-raw-serps/expected/qq-zhong-nian-wei-ji-1408289827.approved.txt
data/manual-annotations/archived-raw-serps/expected/reddit-3ds-1364668924.approved.txt
data/manual-annotations/archived-raw-serps/expected/reddit-b7-1650853917.approved.txt
data/manual-annotations/archived-raw-serps/expected/reddit-flair-already-posted-https-redd-it-xamfzl-1667030236.approved.txt
data/manual-annotations/archived-raw-serps/expected/reddit-german-nebelmittelwurfanlage-1664557458.approved.txt
data/manual-annotations/archived-raw-serps/expected/reddit-how-can-i-access-nature-com-articles-1666041985.approved.txt
data/manual-annotations/archived-raw-serps/expected/reddit-jailbait-1376244913.approved.txt
data/manual-annotations/archived-raw-serps/expected/reddit-supermoon-1403887175.approved.txt
data/manual-annotations/archived-raw-serps/expected/reddit-teleperformance-1260472045.approved.txt
data/manual-annotations/archived-raw-serps/expected/reddit-touch-meme-1663170731.approved.txt
data/manual-annotations/archived-raw-serps/expected/reddit-typetest-10fastfingers-1663797626.approved.txt
data/manual-annotations/archived-raw-serps/expected/roblox-build-to-survive-black-people-1663785067.approved.txt
data/manual-annotations/archived-raw-serps/expected/roblox-chernobyl-rp-1666555966.approved.txt
data/manual-annotations/archived-raw-serps/expected/roblox-cut-1633682743.approved.txt
data/manual-annotations/archived-raw-serps/expected/roblox-shorts-1357668619.approved.txt
data/manual-annotations/archived-raw-serps/expected/roblox-soviet-union-1657601847.approved.txt
data/manual-annotations/archived-raw-serps/expected/roblox-survival-1656759229.approved.txt
data/manual-annotations/archived-raw-serps/expected/roblox-survive-the-killer-1640549700.approved.txt
data/manual-annotations/archived-raw-serps/expected/roblox-welcome-to-the-town-of-robloxia-uncopylocked-1660007534.approved.txt
data/manual-annotations/archived-raw-serps/expected/roblox-xo-so-88-gg8-run-1659417607.approved.txt
data/manual-annotations/archived-raw-serps/expected/roblox-znation8000-1650350926.approved.txt
data/manual-annotations/archived-raw-serps/expected/sogou-cesuk-163163163-cn-1493482889.approved.txt
data/manual-annotations/archived-raw-serps/expected/sogou-eed-1-4a1-a-1332923188.approved.txt
data/manual-annotations/archived-raw-serps/expected/sogou-j-1341637724.approved.txt
data/manual-annotations/archived-raw-serps/expected/sogou-ju-min-shen-fen-zheng-hao-ma-he-xing-ming-1508756794.approved.txt
data/manual-annotations/archived-raw-serps/expected/sogou-l-1346233371.approved.txt
data/manual-annotations/archived-raw-serps/expected/sogou-lofke-yi-kua-shi-chang-mai-mai-1333092705.approved.txt
data/manual-annotations/archived-raw-serps/expected/sogou-ti-gong-jin-kou-qi-qiang-1647324224.approved.txt
data/manual-annotations/archived-raw-serps/expected/sogou-tu-guan-xin-gai-kuan-1332227339.approved.txt
data/manual-annotations/archived-raw-serps/expected/sogou-xing-ai-ji-qiao-1576522689.approved.txt
data/manual-annotations/archived-raw-serps/expected/sogou-yuan-dai-ma-xie-lu-1578201651.approved.txt
data/manual-annotations/archived-raw-serps/expected/stackoverflow-fonts-swing-jtextpane-1412916125.approved.txt
data/manual-annotations/archived-raw-serps/expected/stackoverflow-numpy-einsum-1516773912.approved.txt
data/manual-annotations/archived-raw-serps/expected/stackoverflow-objective-c-1354546520.approved.txt
data/manual-annotations/archived-raw-serps/expected/stackoverflow-remote-execution-1645839151.approved.txt
data/manual-annotations/archived-raw-serps/expected/stackoverflow-ruby-1-9-3-heroku-1398351399.approved.txt
data/manual-annotations/archived-raw-serps/expected/stackoverflow-ruby-on-rails-plugins-1613971498.approved.txt
data/manual-annotations/archived-raw-serps/expected/stackoverflow-scala-1355718931.approved.txt
data/manual-annotations/archived-raw-serps/expected/stackoverflow-text-mining-1522322354.approved.txt
data/manual-annotations/archived-raw-serps/expected/stackoverflow-vue-js-php-1647710017.approved.txt
data/manual-annotations/archived-raw-serps/expected/stackoverflow-xampp-java-1547581001.approved.txt
data/manual-annotations/archived-raw-serps/expected/tribunnews-linkid-js-1533168227.approved.txt
data/manual-annotations/archived-raw-serps/expected/tribunnews-peringatan-dini-bmkg-kamis-18-februari-2021-1632679908.approved.txt
data/manual-annotations/archived-raw-serps/expected/tribunnews-pilgub-dki-jakarta-2017-1491952670.approved.txt
data/manual-annotations/archived-raw-serps/expected/tribunnews-polsek-tallo-1663047668.approved.txt
data/manual-annotations/archived-raw-serps/expected/tribunnews-ricky-natapradja-1663049121.approved.txt
data/manual-annotations/archived-raw-serps/expected/tribunnews-search-term-string-1607106977.approved.txt
data/manual-annotations/archived-raw-serps/expected/tribunnews-search-term-string-1607120098.approved.txt
data/manual-annotations/archived-raw-serps/expected/tribunnews-search-term-string-1607130238.approved.txt
data/manual-annotations/archived-raw-serps/expected/tribunnews-search-term-string-1607224881.approved.txt
data/manual-annotations/archived-raw-serps/expected/tribunnews-search-term-string-1607225592.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitch-a-plague-tale-innocence-1638358324.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitch-central-fluminense-1630540704.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitch-cpentagon-1640060311.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitch-juliversal-1629750531.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitch-rubberboy2001-1661349876.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitch-simplevar-1662753027.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitch-xxlillythefallenangelneko-1638019769.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitter-campaigns-1481768285.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitter-clubs-lang-sr-1591413117.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitter-ecotourism-1557315321.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitter-freemariabutina-1555124003.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitter-freemariabutina-1562498575.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitter-freemariabutina-1563062689.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitter-http-shop-pre-com-corona02-p-409033-1648579256.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitter-lauramajor-1652839406.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitter-rabble-ca-lang-ar-1442999332.approved.txt
data/manual-annotations/archived-raw-serps/expected/twitter-rabble-ca-lang-fr-lang-id-1443046297.approved.txt
data/manual-annotations/archived-raw-serps/expected/vk-000-space-marine-1363614912.approved.txt
data/manual-annotations/archived-raw-serps/expected/vk-grot-1353834377.approved.txt
data/manual-annotations/archived-raw-serps/expected/vk-gta-1389323282.approved.txt
data/manual-annotations/archived-raw-serps/expected/vk-imperatory-illiuzii-1387125239.approved.txt
data/manual-annotations/archived-raw-serps/expected/vk-loco-roco-1377325114.approved.txt
data/manual-annotations/archived-raw-serps/expected/vk-muzyka-1372304839.approved.txt
data/manual-annotations/archived-raw-serps/expected/vk-rabotaiu-na-sebia-1361533546.approved.txt
data/manual-annotations/archived-raw-serps/expected/vk-radioelektronnye-sistemy-1361402773.approved.txt
data/manual-annotations/archived-raw-serps/expected/vk-sportivnye-mototsikly-1387080107.approved.txt
data/manual-annotations/archived-raw-serps/expected/vk-technology-1383988940.approved.txt
data/manual-annotations/archived-raw-serps/expected/weibo-bu-xiang-jiao-hun-li-de-fen-zi-qian-refer-focus-lx-stopic-box-1527256388.approved.txt
data/manual-annotations/archived-raw-serps/expected/weibo-fu-cai-3dshu-ju-jia-wei-xin-xw639-2018-1555261848.approved.txt
data/manual-annotations/archived-raw-serps/expected/weibo-hun-li-li-jie-refer-stopic-box-1505917853.approved.txt
data/manual-annotations/archived-raw-serps/expected/weibo-ji-huo-ma-b-1-page-2-1433297692.approved.txt
data/manual-annotations/archived-raw-serps/expected/weibo-kan-bu-jian-de-yan-pi-1409920649.approved.txt
data/manual-annotations/archived-raw-serps/expected/weibo-li-wei-yi-refer-user-weibo-1517733046.approved.txt
data/manual-annotations/archived-raw-serps/expected/weibo-meng-lin-dexiao-wu-1437753277.approved.txt
data/manual-annotations/archived-raw-serps/expected/weibo-quan-ye-cha-page-13-1517932436.approved.txt
data/manual-annotations/archived-raw-serps/expected/weibo-sheng-fu-lang-xi-si-1603546157.approved.txt
data/manual-annotations/archived-raw-serps/expected/weibo-xiao-xiao-bin-refer-stopic-box-1518290095.approved.txt
data/manual-annotations/archived-raw-serps/expected/wikimedia-ao-xian-yu-le-du-bo-ping-tai-1430758639.approved.txt
data/manual-annotations/archived-raw-serps/expected/wikimedia-figures-in-theatrical-costumes-claude-gillot-1673-1722-class-photo-description-french-painter-drawer-1632572254.approved.txt
data/manual-annotations/archived-raw-serps/expected/wikimedia-group-portrait-on-doorstep-collins-tudor-washington-1898-1970-photographer-75426-object-number-haswb-1629837781.approved.txt
data/manual-annotations/archived-raw-serps/expected/wikimedia-la-dian-zi-ji-1517046182.approved.txt
data/manual-annotations/archived-raw-serps/expected/wikimedia-mou-lin-han-site-pku-edu-cn-1576868395.approved.txt
data/manual-annotations/archived-raw-serps/expected/wikimedia-nito-1655733503.approved.txt
data/manual-annotations/archived-raw-serps/expected/wikimedia-oxygen-1596274448.approved.txt
data/manual-annotations/archived-raw-serps/expected/wikimedia-prob-1543207125.approved.txt
data/manual-annotations/archived-raw-serps/expected/wikimedia-recaptchalogo-svg-1509131182.approved.txt
data/manual-annotations/archived-raw-serps/expected/wikimedia-zao-can-bao-zi-jia-meng-www-baidu-com-aaaa-4ws-2018nian-8yue-4ri-21shi-19fen-39miao-1537939782.approved.txt
data/manual-annotations/archived-raw-serps/expected/xvideos-casey-cavert-go-to-www-lovesbo-ru-1642784433.approved.txt
data/manual-annotations/archived-raw-serps/expected/xvideos-claire-dames-1325320932.approved.txt
data/manual-annotations/archived-raw-serps/expected/xvideos-drunk-1325431509.approved.txt
data/manual-annotations/archived-raw-serps/expected/xvideos-escola-sao-luis-1657929903.approved.txt
data/manual-annotations/archived-raw-serps/expected/xvideos-evander-marius-go-to-www-lovesbo-ru-1642716634.approved.txt
data/manual-annotations/archived-raw-serps/expected/xvideos-free-hardcore-porn-videos-japanese-video-1428541087.approved.txt
data/manual-annotations/archived-raw-serps/expected/xvideos-gay-peeing-porn-go-to-www-lovesbo-ru-1643357466.approved.txt
data/manual-annotations/archived-raw-serps/expected/xvideos-gay-whitezilla-go-to-www-lovesbo-ru-1643416941.approved.txt
data/manual-annotations/archived-raw-serps/expected/xvideos-robber-1248193170.approved.txt
data/manual-annotations/archived-raw-serps/expected/xvideos-solo-1341894522.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-ashland-oregon-1015423512.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-bc-gov-971492707.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-castle-heights-angeles-1611631735.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-civil-code-1658358754.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-dating-sites-1647656764.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-diver-lg-u8180-1620023310.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-dunwells-army-of-friends-1617218301.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-futurama-1314932107.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-jello-gelatin-971145484.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-john-mccain-956454509.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-lawyer-career-information-970996615.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-learn-to-focus-1619373855.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-lm-1376048795.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-metallurgi-1042532635.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-molly-shannon-1640079691.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-region-de-coquimbo-970960857.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-sam-bush-1016625678.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-telekwiaciarnia-pl-1436841282.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-world-greek-kata-periokhe-boreia-amerike-kanadas-1647085990.approved.txt
data/manual-annotations/archived-raw-serps/expected/yahoo-world-hebrew-hbrh-shlvm-1489427128.approved.txt
data/manual-annotations/archived-raw-serps/expected/yandex-danses-tv-periodic-1534841786.approved.txt
data/manual-annotations/archived-raw-serps/expected/yandex-niusha-1515577404.approved.txt
data/manual-annotations/archived-raw-serps/expected/yandex-speed-force-1535895408.approved.txt
data/manual-annotations/archived-raw-serps/expected/yandex-speed-force-1535955246.approved.txt
data/manual-annotations/archived-raw-serps/expected/yandex-speed-force-1535970436.approved.txt
data/manual-annotations/archived-raw-serps/expected/yandex-speed-force-1535973684.approved.txt
data/manual-annotations/archived-raw-serps/expected/yandex-speed-force-1536042339.approved.txt
data/manual-annotations/archived-raw-serps/expected/yandex-speed-force-1536100368.approved.txt
data/manual-annotations/archived-raw-serps/expected/yandex-spirited-away-animated-film-2001-1524478207.approved.txt
data/manual-annotations/archived-raw-serps/expected/yandex-virat-swaroop-1523992633.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-ampatuanmassacre-1583309425.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-cardistry-asmr-1577113546.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-chaoz-time-1652155691.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-chto-budet-esli-sobrat-vse-rezinki-v-bully-1599555287.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-hp-probook-640-g1-razborka-1579996098.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-kak-sdelat-vzryv-v-cinema-4d-1578732009.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-kak-vybit-dushu-moba-1599057231.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-kutyaplya-1561171748.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-lataji-1563086980.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-literatura-6-klass-biografiia-pushkina-1595705363.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-ne-prosto-bekkhen-reaktsiia-1582514079.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-pora-tiulpanov-aktery-1574991226.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-prokhozhdenie-igry-madagaskar-2-chast-4-1562167916.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-pudding-1563068696.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-q2-2017-arizona-west-regional-1547871680.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-razbor-shchetki-pylesosa-samsung-1584184489.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-robloks-mip-siti-2019-1583715307.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-shang-yue-xian-rupu-1582390054.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-uzbekskaia-svadba-v-shymkente-1574145605.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-vasilii-emelianenko-tefteli-1578285143.approved.txt
data/manual-annotations/archived-raw-serps/expected/youtube-zadnii-most-moskvich-412-ustroistvo-1584150216.approved.txt
data/manual-annotations/archived-raw-serps/warcs/360-21tian-jian-fei-fa-1653652529.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-an-jian-diao-cha-bi-lu-1576497545.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-chen-teng-tan-han-han-mei-ren-jie-ji-1577810108.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-duo-jia-ju-chang-qu-xiao-yan-chu-1579663841.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-hui-ji-pei-xun-1579668897.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-jin-ji-che-hui-1576637402.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-ju-min-jia-zhong-fei-jin-bian-fu-1585788499.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-lu-xing-she-tuan-dui-you-zan-ting-1580014331.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-mao-bu-yi-xiao-chou-chao-xi-1576791478.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-mei-guo-qian-zheng-1579954288.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-niu-jie-5hao-1576819311.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-shi-jie-zui-ai-de-ren-qu-shi-1579483438.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-si-chuan-fa-sheng-6-8ji-di-zhen-1662384982.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-site-chaxun-biz-1550145271.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-site-chaxun-biz-1550161171.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-site-chaxun-biz-1551742698.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-site-eng-kaz-enacademic-com-1581787441.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-tao-bao-ru-he-che-hui-ping-jie-1576604187.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-wu-xiu-bo-yang-kun-ju-hui-hai-ge-1577858222.warc.gz
data/manual-annotations/archived-raw-serps/warcs/360-zhang-bo-zhi-bei-gou-yao-shang-1661090641.warc.gz
data/manual-annotations/archived-raw-serps/warcs/aliexpress-barefoot-accessories-1508385234.warc.gz
data/manual-annotations/archived-raw-serps/warcs/aliexpress-children-school-bag-1388612710.warc.gz
data/manual-annotations/archived-raw-serps/warcs/aliexpress-jewelry-sets-1390214039.warc.gz
data/manual-annotations/archived-raw-serps/warcs/aliexpress-kids-wear-1293514157.warc.gz
data/manual-annotations/archived-raw-serps/warcs/aliexpress-q5-phone-1374387535.warc.gz
data/manual-annotations/archived-raw-serps/warcs/aliexpress-sterling-silver-gemstone-1384701071.warc.gz
data/manual-annotations/archived-raw-serps/warcs/aliexpress-sterling-silver-pandora-charms-1457090417.warc.gz
data/manual-annotations/archived-raw-serps/warcs/aliexpress-transmission-shaft-1435605057.warc.gz
data/manual-annotations/archived-raw-serps/warcs/aliexpress-waterproof-winter-boots-women-1500418630.warc.gz
data/manual-annotations/archived-raw-serps/warcs/aliexpress-women-messenger-bags-1397198302.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-deng-shi-jia-meng-www-baidu-com-aaaa-oq1-2018nian-8yue-19ri-19shi-32fen-18miao-1564357198.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-dnd-flash-mosquito-1633240377.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-eksa-1662208504.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-gaming-monitor-1639065822.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-guang-huaiba-pawameta-1632443655.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-gym-mat-tile-1634791656.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-joseph-menn-1605789427.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-lovense-1618431049.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-monitor-1625069899.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-notebook-dell-1613232661.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-omega-labyrinth-z-1616510370.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-pillow-fsa-1615964514.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-qian-lian-mo-hua-orizinarusaundotoratsuku-1657685343.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-solefit-1663015835.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-tazas-te-transparente-1613035337.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-the-montells-you-can-t-make-me-1617199271.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-the-risks-of-prescription-drugs-1657820625.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-under-armour-socks-1553107494.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-yangumagazin-yanmagasado-1625727694.warc.gz
data/manual-annotations/archived-raw-serps/warcs/amazon-zao-dian-jia-meng-pin-pai-www-baidu-com-aaaa-zv9-2018nian-8yue-21ri-17shi-49fen-4miao-1564430721.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-e-eeeae-1522165852.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-e-eeeae-1544443888.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-e-eeeae-1547510808.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-e-eeeae-1554431691.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-e-eeeae-1558963051.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-e-eeeae-1567878230.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-e-eeeae-1569971117.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-eru-1525597474.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-lian-xi-qu-dian-nao-pei-xun-1643390077.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-lian-yun-gang-qi-yang-yue-zi-hui-suo-zhong-xin-jing-zhun-ke-hu-ying-xiao-key668-cn-1537259978.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-long-feng-qu-qi-quan-qi-huo-pei-xun-1642853767.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-long-shi-liang-1639014949.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-mao-kui-lu-mao-cong-cong-mao-ning-kui-mao-dan-dan-mang-lu-1549835769.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-mao-kui-lu-mao-cong-cong-mao-ning-kui-mao-dan-dan-mang-lu-1553838201.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-mao-kui-lu-mao-cong-cong-mao-ning-kui-mao-dan-dan-mang-lu-1558979495.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-mao-kui-lu-mao-cong-cong-mao-ning-kui-mao-dan-dan-mang-lu-1564025418.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-mao-ning-kui-mao-dan-dan-mang-long-lou-mang-lu-shikato-1537858258.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-sexinsex-1-2-1213858525.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-shang-hai-11xuan-5ding-dan-wei-xin-weiwei776699-1545685930.warc.gz
data/manual-annotations/archived-raw-serps/warcs/baidu-zhong-guo-lian-tong-smsc-1372160086.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bing-florence-caillon-bandcamp-1647897680.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bing-great-blue-heron-sound-1660552923.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bing-intel-i7-chip-1656625238.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bing-kumolife-1388896361.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bing-mesozoic-wikipedia-1643229373.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bing-regional-asia-taiwan-localities-taichung-city-1580952978.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bing-rpg-title-screen-1656104004.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bing-uscis-forms-400-1486690408.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bing-win10-iso-xia-zai-1580349240.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bing-wonder-kids-beginning-1652132287.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bongacams-asslicking-1623701105.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bongacams-camshow-1653633300.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bongacams-cock-sucking-1617355851.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bongacams-facial-1578106424.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bongacams-fucking-1577494848.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bongacams-hd-plus-1576443879.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bongacams-massage-1604010948.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bongacams-stripping-1623091287.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bongacams-stripping-1648087966.warc.gz
data/manual-annotations/archived-raw-serps/warcs/bongacams-teasing-1635992326.warc.gz
data/manual-annotations/archived-raw-serps/warcs/canva-instagram-reels-video-1607594697.warc.gz
data/manual-annotations/archived-raw-serps/warcs/cnn-achievable-possibility-1642416655.warc.gz
data/manual-annotations/archived-raw-serps/warcs/cnn-apple-watch-2016-1662575890.warc.gz
data/manual-annotations/archived-raw-serps/warcs/cnn-biteukoinsaryeomyeon-www-99m-kr-bei-biteukoinsaryelie-biteukoinsaeobbbiteukoinsaeobjadeungrog-biteukoinsayongbangbeob8reduplicate-1647153716.warc.gz
data/manual-annotations/archived-raw-serps/warcs/cnn-dxa-1634498874.warc.gz
data/manual-annotations/archived-raw-serps/warcs/cnn-kindly-check-1642508434.warc.gz
data/manual-annotations/archived-raw-serps/warcs/cnn-march-4-1633908556.warc.gz
data/manual-annotations/archived-raw-serps/warcs/cnn-north-dakota-coronavirus-1632793271.warc.gz
data/manual-annotations/archived-raw-serps/warcs/cnn-skilled-1643388398.warc.gz
data/manual-annotations/archived-raw-serps/warcs/cnn-trigger-1638271567.warc.gz
data/manual-annotations/archived-raw-serps/warcs/cnn-wright-1616229078.warc.gz
data/manual-annotations/archived-raw-serps/warcs/csdn-android-studio-1660487187.warc.gz
data/manual-annotations/archived-raw-serps/warcs/csdn-fifo-1663204137.warc.gz
data/manual-annotations/archived-raw-serps/warcs/csdn-rows-1665572662.warc.gz
data/manual-annotations/archived-raw-serps/warcs/ebay-1-4-1566151598.warc.gz
data/manual-annotations/archived-raw-serps/warcs/ebay-hu-bei-kuai-san-zhong-jiang-zhu-shou-jia-wei-vxin-weiwei776699-1548361753.warc.gz
data/manual-annotations/archived-raw-serps/warcs/ebay-imaginext-batman-motorcycle-1597143554.warc.gz
data/manual-annotations/archived-raw-serps/warcs/ebay-liu-he-cai-2o2526-wei-xin-weiwei776699-1562672411.warc.gz
data/manual-annotations/archived-raw-serps/warcs/ebay-liu-he-cai-gua-pai-wan-zheng-ban-wei-vxin-weiwei776699-1565563054.warc.gz
data/manual-annotations/archived-raw-serps/warcs/ebay-se-xing-mei-jia-wei-vxin-dun35358-1544323503.warc.gz
data/manual-annotations/archived-raw-serps/warcs/ebay-shan-dong-11xuan-5-360cai-piao-wei-xin-weiwei776699-1538125942.warc.gz
data/manual-annotations/archived-raw-serps/warcs/ebay-smartphones-1334931269.warc.gz
data/manual-annotations/archived-raw-serps/warcs/ebay-srch-str-1641320481.warc.gz
data/manual-annotations/archived-raw-serps/warcs/ebay-victoria-s-secret-1559541146.warc.gz
data/manual-annotations/archived-raw-serps/warcs/espn-ball-state-1615730301.warc.gz
data/manual-annotations/archived-raw-serps/warcs/espn-ball-state-1619440827.warc.gz
data/manual-annotations/archived-raw-serps/warcs/etsy-andrew-kim-1566153959.warc.gz
data/manual-annotations/archived-raw-serps/warcs/etsy-embroidery-kit-1375805089.warc.gz
data/manual-annotations/archived-raw-serps/warcs/etsy-hard-plastic-1333819223.warc.gz
data/manual-annotations/archived-raw-serps/warcs/etsy-invitation-1367234841.warc.gz
data/manual-annotations/archived-raw-serps/warcs/etsy-pacifier-1336437253.warc.gz
data/manual-annotations/archived-raw-serps/warcs/etsy-pendants-1667648711.warc.gz
data/manual-annotations/archived-raw-serps/warcs/etsy-small-animal-1378528595.warc.gz
data/manual-annotations/archived-raw-serps/warcs/etsy-storage-and-organization-1632881953.warc.gz
data/manual-annotations/archived-raw-serps/warcs/etsy-sundress-1374472455.warc.gz
data/manual-annotations/archived-raw-serps/warcs/etsy-yan-dong-kuai-le-shi-fen-shou-ji-ban-jia-wei-xin-xw639-2018-1537911471.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-alda-lesbiennes-refugiees-1615284371.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-bernieorbust-1467812085.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-blog-post-319-je31-1567459151.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-blog-post-334-bootload-1567494170.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-cruzcrew-1459272010.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-deanna-sanchez-1629215596.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-greet-1623235952.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-https-peelarchivesblog-com-about-peel-1599241783.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-ineligible-1466870871.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-mens-health-survival-of-the-fittest-1619473718.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-mr-robot-1469187052.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-rosy-20gupta-1494524363.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-social-plugins-boutons-jaime-envoyer-partager-et-citations-js-exec-je31-1567485463.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-solcellespecialisten-1389488036.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-tag-someone-who-needs-this-1587554575.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-trumptrain-1461904486.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-vanilla-1481832838.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-virpi-soikkeli-1623257178.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-wisconsin-1463064570.warc.gz
data/manual-annotations/archived-raw-serps/warcs/facebook-www-9xcb-biz-webex-setup-was-unsuccessful-error-23-1404412853.warc.gz
data/manual-annotations/archived-raw-serps/warcs/github-jie-shi-de-jing-mi-gong-ye-you-xian-gong-si-1597737683.warc.gz
data/manual-annotations/archived-raw-serps/warcs/github-licensing-gethhwid-1662581846.warc.gz
data/manual-annotations/archived-raw-serps/warcs/github-mastodon-1657854340.warc.gz
data/manual-annotations/archived-raw-serps/warcs/github-org-zettlr-1591675518.warc.gz
data/manual-annotations/archived-raw-serps/warcs/github-subrock-1653099784.warc.gz
data/manual-annotations/archived-raw-serps/warcs/github-topic-bootswatch-org-sslcom-1552777180.warc.gz
data/manual-annotations/archived-raw-serps/warcs/github-topic-deprecated-org-bandwidth-fork-true-1634361552.warc.gz
data/manual-annotations/archived-raw-serps/warcs/github-topic-docker-stack-org-issuu-1549097482.warc.gz
data/manual-annotations/archived-raw-serps/warcs/github-topic-id-org-codercom-1553274306.warc.gz
data/manual-annotations/archived-raw-serps/warcs/github-topic-web-components-org-github-fork-true-1650012538.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-243-1628713922.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-a-tumeo-m-branca-l-camerini-a-dual-priority-realtime-multiprocessor-system-on-fpga-for-automotive-ap-1614181186.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-atalanta-bergamasca-calcio-wikipedia-1643703998.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-attack-on-titan-season-3-part-2-1556408967.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-bill-easley-sophisticated-prelude-1626498182.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-chi-zhen-nzhong-rong-keteru-1608917828.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-does-steve-has-a-beard-1601705030.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-finance-1540158323.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-flop-thumbs-up-1568839345.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-hawaii-part-ii-lyrics-1633318830.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-hola-games-1552338270.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-muhammed-rashid-1656890873.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-ocolc-826746-1240020639.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-orlando-bedoya-site-wikipedia-org-site-wikimedia-org-1629326434.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-susan-boyle-make-me-a-channel-of-your-peace-1607791072.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-taikoo-hui-mandarin-oriental-hotel-guangzhou-1652086766.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-w-fan-j-li-s-ma-n-tang-and-w-yu-april-2012-towards-certain-fixes-with-editing-rules-and-master-data-the-vldb-journal-21-2-213-238-297-10-1007-s00778-011-0253-7-1614165399.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-why-is-one-foot-slightly-larger-than-the-other-one-1605140430.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-win10-iso-xia-zai-1577717811.warc.gz
data/manual-annotations/archived-raw-serps/warcs/google-zombie-apocalypse-1565114134.warc.gz
data/manual-annotations/archived-raw-serps/warcs/imdb-0609265-s-nm-1329020836.warc.gz
data/manual-annotations/archived-raw-serps/warcs/imdb-angelina-jolie-1452700725.warc.gz
data/manual-annotations/archived-raw-serps/warcs/imdb-dogville-1187303706.warc.gz
data/manual-annotations/archived-raw-serps/warcs/imdb-gundula-rapsch-1628094679.warc.gz
data/manual-annotations/archived-raw-serps/warcs/imdb-hunger-games-1518585690.warc.gz
data/manual-annotations/archived-raw-serps/warcs/imdb-marcela-gomez-montoya-1614546944.warc.gz
data/manual-annotations/archived-raw-serps/warcs/imdb-murder-world-1268209692.warc.gz
data/manual-annotations/archived-raw-serps/warcs/imdb-pulse-1283006912.warc.gz
data/manual-annotations/archived-raw-serps/warcs/imdb-sam-claflin-1472223834.warc.gz
data/manual-annotations/archived-raw-serps/warcs/imdb-the-expanse-1521743964.warc.gz
data/manual-annotations/archived-raw-serps/warcs/imgur-search-term-string-1547858079.warc.gz
data/manual-annotations/archived-raw-serps/warcs/imgur-search-term-string-1565643838.warc.gz
data/manual-annotations/archived-raw-serps/warcs/indeed-60000-1450529560.warc.gz
data/manual-annotations/archived-raw-serps/warcs/indeed-concept-development-integration-1647187061.warc.gz
data/manual-annotations/archived-raw-serps/warcs/indeed-customer-service-1447983624.warc.gz
data/manual-annotations/archived-raw-serps/warcs/indeed-hca-corporate-1334425152.warc.gz
data/manual-annotations/archived-raw-serps/warcs/indeed-higher-education-freelance-network-1585036169.warc.gz
data/manual-annotations/archived-raw-serps/warcs/indeed-international-english-prep-academy-iepa-1586237411.warc.gz
data/manual-annotations/archived-raw-serps/warcs/indeed-jvm-lending-1586272513.warc.gz
data/manual-annotations/archived-raw-serps/warcs/indeed-origins-macy-s-1353165507.warc.gz
data/manual-annotations/archived-raw-serps/warcs/indeed-pepsico-1487893385.warc.gz
data/manual-annotations/archived-raw-serps/warcs/indeed-steiner-business-solutions-1577184429.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-1368578723.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-ch-1382528052.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-che-zai-cd-1507664971.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-dao-tian-1442736754.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-guan-yin-liao-1501752634.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-iu-vdun35358z-1537907650.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-ji-guang-da-yin-1516054404.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-mo-yan-1516163665.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-qing-shang-1510505248.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-sha-tan-ku-nan-1446537277.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-shlrwanh-uq-2179706936-1473350033.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-song-yan-1434320372.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-tai-kong-zhen-1508948206.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-tuan-dui-guan-li-1429813603.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-weiwei776699e-1537978473.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-xian-xing-dai-shu-1497236213.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-yang-hong-wei-1429640037.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-you-xi-1511265243.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-yu-diao-jiu-ning-meng-1601078153.warc.gz
data/manual-annotations/archived-raw-serps/warcs/jd-yxingaiujliu-xwcp198-1537926187.warc.gz
data/manual-annotations/archived-raw-serps/warcs/linkedin-cryptocurrencies-1556702372.warc.gz
data/manual-annotations/archived-raw-serps/warcs/linkedin-g-d-goenka-international-school-surat-wisdom-valley-campus-nr-anuvrat-dwar-new-city-light-road-rcc-canal-road-bar-surat-gujrat-1568400864.warc.gz
data/manual-annotations/archived-raw-serps/warcs/linkedin-james-margolin-fbi-1571382052.warc.gz
data/manual-annotations/archived-raw-serps/warcs/linkedin-parent-information-center-of-nj-https-www-linkedin-com-feed-1537815970.warc.gz
data/manual-annotations/archived-raw-serps/warcs/linkedin-parent-information-center-of-nj-https-www-linkedin-com-search-results-all-keywords-parent-information-center-of-nj-1537815969.warc.gz
data/manual-annotations/archived-raw-serps/warcs/linkedin-test-1563279735.warc.gz
data/manual-annotations/archived-raw-serps/warcs/linkedin-vizthink-1229875736.warc.gz
data/manual-annotations/archived-raw-serps/warcs/manual-facebook.warc.gz
data/manual-annotations/archived-raw-serps/warcs/manual-google-1.warc.gz
data/manual-annotations/archived-raw-serps/warcs/manual-google-2.warc.gz
data/manual-annotations/archived-raw-serps/warcs/manual-youtube.warc.gz
data/manual-annotations/archived-raw-serps/warcs/naver-10559239-974530320.warc.gz
data/manual-annotations/archived-raw-serps/warcs/naver-11548566-973003263.warc.gz
data/manual-annotations/archived-raw-serps/warcs/naver-dpsxmfl-1632292908.warc.gz
data/manual-annotations/archived-raw-serps/warcs/naver-geobugseon-1439284748.warc.gz
data/manual-annotations/archived-raw-serps/warcs/naver-heolkeu-dari-gajin-namja-1397449417.warc.gz
data/manual-annotations/archived-raw-serps/warcs/naver-junggangosagongbu-1627424756.warc.gz
data/manual-annotations/archived-raw-serps/warcs/naver-monteria-montheria-bunyupoteu-jeongitipoteu-jeongijujeonja-bunyupoteugi-1200ml-1200mlpingkeu-hugi-1640713881.warc.gz
data/manual-annotations/archived-raw-serps/warcs/naver-sejongmunhwahoegwan-daegeugjang-1652993423.warc.gz
data/manual-annotations/archived-raw-serps/warcs/naver-t-1047137339.warc.gz
data/manual-annotations/archived-raw-serps/warcs/naver-z-974687640.warc.gz
data/manual-annotations/archived-raw-serps/warcs/pornhub-12-1565436858.warc.gz
data/manual-annotations/archived-raw-serps/warcs/pornhub-brazilian-carnival-1378864891.warc.gz
data/manual-annotations/archived-raw-serps/warcs/pornhub-cfnm2-1271707129.warc.gz
data/manual-annotations/archived-raw-serps/warcs/pornhub-full-porno-films-1633148146.warc.gz
data/manual-annotations/archived-raw-serps/warcs/pornhub-kardashian-1232246712.warc.gz
data/manual-annotations/archived-raw-serps/warcs/pornhub-mini-skirt-1257166127.warc.gz
data/manual-annotations/archived-raw-serps/warcs/pornhub-org-1219567054.warc.gz
data/manual-annotations/archived-raw-serps/warcs/pornhub-roxy-reynolds-1222496976.warc.gz
data/manual-annotations/archived-raw-serps/warcs/pornhub-stripping-1627962626.warc.gz
data/manual-annotations/archived-raw-serps/warcs/pornhub-teen-facial-1237526884.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-danil-kozlovsky-1360453772.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-erin-1359911760.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-feng-mi-lian-1380895166.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-hua-ze-xiang-cai-1408309927.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-james-boshier-1449178049.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-ji-lin-yan-ji-1360789033.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-ji-zhou-dao-ttglao-hu-ji-ping-tai-guan-wang-x-fa33n-comx-1488991663.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-jie-ke-luo-de-wei-er-1445892389.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-jin-cheng-wu-1319745059.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-jin-xiu-long-1320298924.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-lin-xi-tong-1507483207.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-niamh-cusack-1319751306.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-shen-shan-zheng-er-lang-1405119212.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-statue-1536122094.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-tag-encode-1341871817.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-turn-that-finger-around-1324266860.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-xing-xing-di-qiu-2-1582812539.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-zhang-dong-jian-1446133153.warc.gz
data/manual-annotations/archived-raw-serps/warcs/qq-zhong-nian-wei-ji-1408289827.warc.gz
data/manual-annotations/archived-raw-serps/warcs/reddit-3ds-1364668924.warc.gz
data/manual-annotations/archived-raw-serps/warcs/reddit-b7-1650853917.warc.gz
data/manual-annotations/archived-raw-serps/warcs/reddit-flair-already-posted-https-redd-it-xamfzl-1667030236.warc.gz
data/manual-annotations/archived-raw-serps/warcs/reddit-german-nebelmittelwurfanlage-1664557458.warc.gz
data/manual-annotations/archived-raw-serps/warcs/reddit-how-can-i-access-nature-com-articles-1666041985.warc.gz
data/manual-annotations/archived-raw-serps/warcs/reddit-jailbait-1376244913.warc.gz
data/manual-annotations/archived-raw-serps/warcs/reddit-supermoon-1403887175.warc.gz
data/manual-annotations/archived-raw-serps/warcs/reddit-teleperformance-1260472045.warc.gz
data/manual-annotations/archived-raw-serps/warcs/reddit-touch-meme-1663170731.warc.gz
data/manual-annotations/archived-raw-serps/warcs/reddit-typetest-10fastfingers-1663797626.warc.gz
data/manual-annotations/archived-raw-serps/warcs/roblox-build-to-survive-black-people-1663785067.warc.gz
data/manual-annotations/archived-raw-serps/warcs/roblox-chernobyl-rp-1666555966.warc.gz
data/manual-annotations/archived-raw-serps/warcs/roblox-cut-1633682743.warc.gz
data/manual-annotations/archived-raw-serps/warcs/roblox-shorts-1357668619.warc.gz
data/manual-annotations/archived-raw-serps/warcs/roblox-soviet-union-1657601847.warc.gz
data/manual-annotations/archived-raw-serps/warcs/roblox-survival-1656759229.warc.gz
data/manual-annotations/archived-raw-serps/warcs/roblox-survive-the-killer-1640549700.warc.gz
data/manual-annotations/archived-raw-serps/warcs/roblox-welcome-to-the-town-of-robloxia-uncopylocked-1660007534.warc.gz
data/manual-annotations/archived-raw-serps/warcs/roblox-xo-so-88-gg8-run-1659417607.warc.gz
data/manual-annotations/archived-raw-serps/warcs/roblox-znation8000-1650350926.warc.gz
data/manual-annotations/archived-raw-serps/warcs/sogou-cesuk-163163163-cn-1493482889.warc.gz
data/manual-annotations/archived-raw-serps/warcs/sogou-eed-1-4a1-a-1332923188.warc.gz
data/manual-annotations/archived-raw-serps/warcs/sogou-j-1341637724.warc.gz
data/manual-annotations/archived-raw-serps/warcs/sogou-ju-min-shen-fen-zheng-hao-ma-he-xing-ming-1508756794.warc.gz
data/manual-annotations/archived-raw-serps/warcs/sogou-l-1346233371.warc.gz
data/manual-annotations/archived-raw-serps/warcs/sogou-lofke-yi-kua-shi-chang-mai-mai-1333092705.warc.gz
data/manual-annotations/archived-raw-serps/warcs/sogou-ti-gong-jin-kou-qi-qiang-1647324224.warc.gz
data/manual-annotations/archived-raw-serps/warcs/sogou-tu-guan-xin-gai-kuan-1332227339.warc.gz
data/manual-annotations/archived-raw-serps/warcs/sogou-xing-ai-ji-qiao-1576522689.warc.gz
data/manual-annotations/archived-raw-serps/warcs/sogou-yuan-dai-ma-xie-lu-1578201651.warc.gz
data/manual-annotations/archived-raw-serps/warcs/stackoverflow-fonts-swing-jtextpane-1412916125.warc.gz
data/manual-annotations/archived-raw-serps/warcs/stackoverflow-numpy-einsum-1516773912.warc.gz
data/manual-annotations/archived-raw-serps/warcs/stackoverflow-objective-c-1354546520.warc.gz
data/manual-annotations/archived-raw-serps/warcs/stackoverflow-remote-execution-1645839151.warc.gz
data/manual-annotations/archived-raw-serps/warcs/stackoverflow-ruby-1-9-3-heroku-1398351399.warc.gz
data/manual-annotations/archived-raw-serps/warcs/stackoverflow-ruby-on-rails-plugins-1613971498.warc.gz
data/manual-annotations/archived-raw-serps/warcs/stackoverflow-scala-1355718931.warc.gz
data/manual-annotations/archived-raw-serps/warcs/stackoverflow-text-mining-1522322354.warc.gz
data/manual-annotations/archived-raw-serps/warcs/stackoverflow-vue-js-php-1647710017.warc.gz
data/manual-annotations/archived-raw-serps/warcs/stackoverflow-xampp-java-1547581001.warc.gz
data/manual-annotations/archived-raw-serps/warcs/tribunnews-linkid-js-1533168227.warc.gz
data/manual-annotations/archived-raw-serps/warcs/tribunnews-peringatan-dini-bmkg-kamis-18-februari-2021-1632679908.warc.gz
data/manual-annotations/archived-raw-serps/warcs/tribunnews-pilgub-dki-jakarta-2017-1491952670.warc.gz
data/manual-annotations/archived-raw-serps/warcs/tribunnews-polsek-tallo-1663047668.warc.gz
data/manual-annotations/archived-raw-serps/warcs/tribunnews-ricky-natapradja-1663049121.warc.gz
data/manual-annotations/archived-raw-serps/warcs/tribunnews-search-term-string-1607106977.warc.gz
data/manual-annotations/archived-raw-serps/warcs/tribunnews-search-term-string-1607120098.warc.gz
data/manual-annotations/archived-raw-serps/warcs/tribunnews-search-term-string-1607130238.warc.gz
data/manual-annotations/archived-raw-serps/warcs/tribunnews-search-term-string-1607224881.warc.gz
data/manual-annotations/archived-raw-serps/warcs/tribunnews-search-term-string-1607225592.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitch-a-plague-tale-innocence-1638358324.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitch-central-fluminense-1630540704.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitch-cpentagon-1640060311.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitch-juliversal-1629750531.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitch-rubberboy2001-1661349876.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitch-simplevar-1662753027.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitch-xxlillythefallenangelneko-1638019769.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitter-campaigns-1481768285.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitter-clubs-lang-sr-1591413117.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitter-ecotourism-1557315321.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitter-freemariabutina-1555124003.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitter-freemariabutina-1562498575.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitter-freemariabutina-1563062689.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitter-http-shop-pre-com-corona02-p-409033-1648579256.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitter-lauramajor-1652839406.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitter-rabble-ca-lang-ar-1442999332.warc.gz
data/manual-annotations/archived-raw-serps/warcs/twitter-rabble-ca-lang-fr-lang-id-1443046297.warc.gz
data/manual-annotations/archived-raw-serps/warcs/vk-000-space-marine-1363614912.warc.gz
data/manual-annotations/archived-raw-serps/warcs/vk-grot-1353834377.warc.gz
data/manual-annotations/archived-raw-serps/warcs/vk-gta-1389323282.warc.gz
data/manual-annotations/archived-raw-serps/warcs/vk-imperatory-illiuzii-1387125239.warc.gz
data/manual-annotations/archived-raw-serps/warcs/vk-loco-roco-1377325114.warc.gz
data/manual-annotations/archived-raw-serps/warcs/vk-muzyka-1372304839.warc.gz
data/manual-annotations/archived-raw-serps/warcs/vk-rabotaiu-na-sebia-1361533546.warc.gz
data/manual-annotations/archived-raw-serps/warcs/vk-radioelektronnye-sistemy-1361402773.warc.gz
data/manual-annotations/archived-raw-serps/warcs/vk-sportivnye-mototsikly-1387080107.warc.gz
data/manual-annotations/archived-raw-serps/warcs/vk-technology-1383988940.warc.gz
data/manual-annotations/archived-raw-serps/warcs/weibo-bu-xiang-jiao-hun-li-de-fen-zi-qian-refer-focus-lx-stopic-box-1527256388.warc.gz
data/manual-annotations/archived-raw-serps/warcs/weibo-fu-cai-3dshu-ju-jia-wei-xin-xw639-2018-1555261848.warc.gz
data/manual-annotations/archived-raw-serps/warcs/weibo-hun-li-li-jie-refer-stopic-box-1505917853.warc.gz
data/manual-annotations/archived-raw-serps/warcs/weibo-ji-huo-ma-b-1-page-2-1433297692.warc.gz
data/manual-annotations/archived-raw-serps/warcs/weibo-kan-bu-jian-de-yan-pi-1409920649.warc.gz
data/manual-annotations/archived-raw-serps/warcs/weibo-li-wei-yi-refer-user-weibo-1517733046.warc.gz
data/manual-annotations/archived-raw-serps/warcs/weibo-meng-lin-dexiao-wu-1437753277.warc.gz
data/manual-annotations/archived-raw-serps/warcs/weibo-quan-ye-cha-page-13-1517932436.warc.gz
data/manual-annotations/archived-raw-serps/warcs/weibo-sheng-fu-lang-xi-si-1603546157.warc.gz
data/manual-annotations/archived-raw-serps/warcs/weibo-xiao-xiao-bin-refer-stopic-box-1518290095.warc.gz
data/manual-annotations/archived-raw-serps/warcs/wikimedia-ao-xian-yu-le-du-bo-ping-tai-1430758639.warc.gz
data/manual-annotations/archived-raw-serps/warcs/wikimedia-figures-in-theatrical-costumes-claude-gillot-1673-1722-class-photo-description-french-painter-drawer-1632572254.warc.gz
data/manual-annotations/archived-raw-serps/warcs/wikimedia-group-portrait-on-doorstep-collins-tudor-washington-1898-1970-photographer-75426-object-number-haswb-1629837781.warc.gz
data/manual-annotations/archived-raw-serps/warcs/wikimedia-la-dian-zi-ji-1517046182.warc.gz
data/manual-annotations/archived-raw-serps/warcs/wikimedia-mou-lin-han-site-pku-edu-cn-1576868395.warc.gz
data/manual-annotations/archived-raw-serps/warcs/wikimedia-nito-1655733503.warc.gz
data/manual-annotations/archived-raw-serps/warcs/wikimedia-oxygen-1596274448.warc.gz
data/manual-annotations/archived-raw-serps/warcs/wikimedia-prob-1543207125.warc.gz
data/manual-annotations/archived-raw-serps/warcs/wikimedia-recaptchalogo-svg-1509131182.warc.gz
data/manual-annotations/archived-raw-serps/warcs/wikimedia-zao-can-bao-zi-jia-meng-www-baidu-com-aaaa-4ws-2018nian-8yue-4ri-21shi-19fen-39miao-1537939782.warc.gz
data/manual-annotations/archived-raw-serps/warcs/xvideos-casey-cavert-go-to-www-lovesbo-ru-1642784433.warc.gz
data/manual-annotations/archived-raw-serps/warcs/xvideos-claire-dames-1325320932.warc.gz
data/manual-annotations/archived-raw-serps/warcs/xvideos-drunk-1325431509.warc.gz
data/manual-annotations/archived-raw-serps/warcs/xvideos-escola-sao-luis-1657929903.warc.gz
data/manual-annotations/archived-raw-serps/warcs/xvideos-evander-marius-go-to-www-lovesbo-ru-1642716634.warc.gz
data/manual-annotations/archived-raw-serps/warcs/xvideos-free-hardcore-porn-videos-japanese-video-1428541087.warc.gz
data/manual-annotations/archived-raw-serps/warcs/xvideos-gay-peeing-porn-go-to-www-lovesbo-ru-1643357466.warc.gz
data/manual-annotations/archived-raw-serps/warcs/xvideos-gay-whitezilla-go-to-www-lovesbo-ru-1643416941.warc.gz
data/manual-annotations/archived-raw-serps/warcs/xvideos-robber-1248193170.warc.gz
data/manual-annotations/archived-raw-serps/warcs/xvideos-solo-1341894522.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-ashland-oregon-1015423512.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-bc-gov-971492707.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-castle-heights-angeles-1611631735.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-civil-code-1658358754.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-dating-sites-1647656764.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-diver-lg-u8180-1620023310.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-dunwells-army-of-friends-1617218301.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-futurama-1314932107.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-jello-gelatin-971145484.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-john-mccain-956454509.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-lawyer-career-information-970996615.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-learn-to-focus-1619373855.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-lm-1376048795.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-metallurgi-1042532635.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-molly-shannon-1640079691.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-region-de-coquimbo-970960857.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-sam-bush-1016625678.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-telekwiaciarnia-pl-1436841282.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-world-greek-kata-periokhe-boreia-amerike-kanadas-1647085990.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yahoo-world-hebrew-hbrh-shlvm-1489427128.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yandex-danses-tv-periodic-1534841786.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yandex-niusha-1515577404.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yandex-speed-force-1535895408.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yandex-speed-force-1535955246.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yandex-speed-force-1535970436.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yandex-speed-force-1535973684.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yandex-speed-force-1536042339.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yandex-speed-force-1536100368.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yandex-spirited-away-animated-film-2001-1524478207.warc.gz
data/manual-annotations/archived-raw-serps/warcs/yandex-virat-swaroop-1523992633.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-ampatuanmassacre-1583309425.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-cardistry-asmr-1577113546.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-chto-budet-esli-sobrat-vse-rezinki-v-bully-1599555287.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-hp-probook-640-g1-razborka-1579996098.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-kak-sdelat-vzryv-v-cinema-4d-1578732009.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-kak-vybit-dushu-moba-1599057231.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-kutyaplya-1561171748.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-lataji-1563086980.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-literatura-6-klass-biografiia-pushkina-1595705363.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-ne-prosto-bekkhen-reaktsiia-1582514079.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-pora-tiulpanov-aktery-1574991226.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-prokhozhdenie-igry-madagaskar-2-chast-4-1562167916.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-pudding-1563068696.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-q2-2017-arizona-west-regional-1547871680.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-razbor-shchetki-pylesosa-samsung-1584184489.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-robloks-mip-siti-2019-1583715307.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-shang-yue-xian-rupu-1582390054.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-uzbekskaia-svadba-v-shymkente-1574145605.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-vasilii-emelianenko-tefteli-1578285143.warc.gz
data/manual-annotations/archived-raw-serps/warcs/youtube-zadnii-most-moskvich-412-ustroistvo-1584150216.warc.gz
docs/queries-tsne-teaser.png
docs/queries-tsne.png
helm/archive-query-log/.gitignore
helm/archive-query-log/.helmignore
helm/archive-query-log/Chart.yaml
helm/archive-query-log/values.yaml
helm/archive-query-log/templates/NOTES.txt
helm/archive-query-log/templates/archive-query-log-config-map.yml
helm/archive-query-log/templates/archive-query-log-cron-job-captures-fetch.yml
helm/archive-query-log/templates/archive-query-log-cron-job-sources-build.yml
helm/archive-query-log/templates/archive-query-log-job-captures-fetch.yml
helm/archive-query-log/templates/archive-query-log-job-sources-build.yml
helm/archive-query-log/templates/archive-query-log-secret.yaml
integrations/ir_datasets/Dockerfile
integrations/ir_datasets/Makefile
integrations/ir_datasets/README.md
integrations/ir_datasets/archive_query_log_ir_datasets_integration.py
integrations/ir_datasets/data/results/part-00000.gz
integrations/ir_datasets/data/serps/part-00000.gz
integrations/tira/.gitignore
integrations/tira/Dockerfile
integrations/tira/README.md
integrations/tira/aql-experiment-baseline.py
integrations/tira/validation-data/results/part-00000.gz
integrations/tira/validation-data/serps/part-00000.gz
notebooks/.gitignore
notebooks/Makefile
notebooks/evaluation_corpus_fields.ipynb
notebooks/evaluation_most_referenced_domains.ipynb
notebooks/evaluation_query_length.ipynb
notebooks/evaluation_results_per_serp.ipynb
notebooks/evaluation_services_overview.ipynb
notebooks/evaluation_time_series.ipynb
notebooks/evaluation_trec_overlap.ipynb
notebooks/evaluation_us_election.ipynb
notebooks/example_corpus_parquet.ipynb
notebooks/obscene_queries.ipynb
notebooks/obscene_words.json
notebooks/pornographic_queries.ipynb
notebooks/process_corpus.ipynb
notebooks/process_stats.ipynb
notebooks/services.txt
notebooks/figures/.gitignore
scripts/create_corpus.py
scripts/create_corpus.sh
scripts/create_url_list.py
scripts/create_url_list.sh
scripts/download_corpus.sh