id
string
truth
string
asr_model
string
librispeech_pool
string
asr_hypothesis
sequence
confidence_scores
sequence
error_labels
sequence
"train-other-500/3679/159363/3679-159363-0006"
"came in at the window while the mother was sitting up in the bed admiring him the queen kissed the child and giving it the name of tom thumb"
"default"
"other"
[ "gaming", "at", "the", "window", "while", "the", "mother", "was", "sitting", "up", "in", "the", "bed", "and", "admiring", "him", "the", "queen", "kiss", "the", "child", "and", "giving", "it", "the", "name", "of", "Tom", "Thumb" ]
[ "0.5387797355651855", "0.8006170392036438", "0.8491834402084351", "0.8887510895729065", "0.9876290559768677", "0.9475099444389343", "0.972603976726532", "1.0", "0.9610720872879028", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.837338387966156", "0.85128253698349", "0.9525017738342285", "0.9876290559768677", "0.9771373867988586", "0.9876290559768677", "0.9820003509521484", "0.9715901017189026", "0.9876290559768677", "0.9876290559768677", "0.8932666778564453", "0.9626659750938416" ]
[ "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/2487/162474/2487-162474-0035"
"and had the mind of a fiend without pity how could he in the mere pursuit of pleasure inflict wholly unnecessary torture as in fox hunting"
"video"
"other"
[ "and", "had", "the", "mind", "of", "a", "fiend", "Without", "Pity", "how", "could", "he", "in", "the", "mere", "pursuit", "of", "pleasure", "inflict", "wholly", "unnecessary", "torture", "as", "in", "fox", "hunting" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9055002927780151", "0.8602157831192017", "0.9128385782241821", "0.8589513897895813", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/5655/46268/5655-46268-0004"
"it shone full across the window of virginia's room but she was dreaming of being chased by bears and only turned uneasily in her sleep the old professor on his way to the kitchen noticed that it seemed strangely light outside"
"default"
"clean"
[ "it", "Shone", "full", "across", "the", "window", "of", "Virginia's", "room", "but", "she", "was", "dreaming", "of", "being", "chased", "by", "bears", "and", "only", "turned", "on", "easily", "in", "her", "sleep", "the", "old", "Professor", "on", "his", "way", "to", "the", "kitchen", "notice", "that", "it", "seems", "strangely", "light", "outside" ]
[ "0.8681945204734802", "0.8521996736526489", "0.9876290559768677", "0.9514707922935486", "0.9795611500740051", "0.9876290559768677", "0.984734296798706", "0.9268601536750793", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9730334281921387", "0.9434484243392944", "0.9691612720489502", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "1.0", "0.816135048866272", "0.8682702779769897", "0.9643746018409729", "0.7638311386108398", "0.9807475805282593", "0.9645640850067139", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "1", "0", "0", "0" ]
"train-clean-360/1112/128136/1112-128136-0037"
"but we should be sorry to think that people of any age could admire such a paraphrase as the following foxes have holes in which to slink for rest the birds of air find shelter in the nest but he the son of man and lord of all"
"video"
"clean"
[ "but", "we", "should", "be", "sorry", "to", "think", "that", "people", "of", "any", "age", "could", "admire", "such", "a", "paraphrase", "as", "the", "following", "foxes", "have", "holes", "in", "which", "to", "slink", "for", "rest", "the", "birds", "of", "are", "find", "shelter", "in", "the", "nest", "but", "he", "the", "son", "of", "man", "and", "Lord", "of", "all" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7218547463417053", "0.9128385782241821", "0.8913527727127075", "0.8117090463638306", "0.9128385782241821", "0.8609391450881958", "0.9128385782241821", "0.8486076593399048", "0.9128385782241821", "0.6830317378044128", "0.721420407295227", "0.6200101375579834", "0.7020069360733032", "0.9128385782241821", "0.6499051451683044", "0.6262571811676025" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/6553/86683/6553-86683-0018"
"you man that came in keep restraint on your tongue said the old woman it is not a great thing for you to keep your mouth shut and your tongue quiet when you get a home and shelter of a hearth on a gloomy winter's night well said the hunter i may do that"
"default"
"clean"
[ "you", "mind", "that", "came", "in", "keeper", "strengthen", "your", "tongue", "so", "the", "old", "woman", "it", "is", "not", "a", "great", "thing", "for", "you", "to", "keep", "your", "mouth", "shut", "and", "your", "tongue", "quiet", "when", "you", "get", "home", "and", "shelter", "of", "a", "heart", "on", "its", "Lumia", "Windows", "night", "well", "said", "the", "hunter", "I", "may", "do", "that" ]
[ "0.9876290559768677", "0.8842920064926147", "0.9753667116165161", "0.9761848449707031", "0.9876290559768677", "0.9876290559768677", "0.8067799806594849", "0.965965747833252", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9726088047027588", "0.9734106063842773", "0.9390935897827148", "0.9452678561210632", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.7012288570404053", "0.8871505260467529", "0.9710525274276733", "0.9876290559768677", "0.9876290559768677", "0.9243725538253784", "0.9158138632774353", "0.7929591536521912", "0.7929591536521912", "0.851008415222168", "0.9014337062835693", "0.925896942615509", "0.909906268119812", "0.946442723274231", "0.9876290559768677", "0.9243698120117188", "0.9243698120117188", "0.9876290559768677", "0.9876290559768677" ]
[ "0", "1", "0", "0", "0", "1", "1", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/1061/142358/1061-142358-0041"
"apparently crossed by some thought which jars with the singing perhaps if you had not been already in the secret you might not have guessed what sad memories what warm affection what tender fluttering hopes had their home in this athletic body with the broken finger nails"
"video"
"clean"
[ "apparently", "crossed", "by", "some", "thought", "which", "jars", "with", "the", "singing", "perhaps", "if", "you", "had", "not", "been", "already", "in", "the", "secret", "you", "might", "not", "have", "guessed", "what", "sad", "memories", "what", "warm", "affection", "what", "tender", "fluttering", "hopes", "had", "their", "home", "in", "this", "athletic", "body", "with", "the", "broken", "fingernails" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.5389702916145325", "0.7289538979530334", "0.7685444951057434", "0.9128385782241821", "0.9128385782241821", "0.8155283331871033", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7675387859344482", "0.9128385782241821", "0.7581454515457153", "0.8765930533409119", "0.9128385782241821", "0.7097989320755005", "0.7268638014793396", "0.9128385782241821", "0.9128385782241821", "0.7230059504508972", "0.7713768482208252", "0.8398429751396179" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1" ]
"train-clean-360/2256/137627/2256-137627-0029"
"and there he found his shield and showed it to the damosel ah sir said the damosel that same is he that slew your three sons"
"video"
"clean"
[ "and", "there", "he", "found", "his", "shield", "and", "showed", "it", "to", "the", "damsel", "ah", "sir", "said", "the", "damsel", "that", "same", "is", "he", "that's", "Lou", "your", "Three", "Sons" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.6913559436798096", "0.7414964437484741", "0.9128385782241821", "0.7669572830200195", "0.9128385782241821", "0.8575521111488342", "0.9128385782241821", "0.8921946287155151", "0.8520802855491638", "0.3548034429550171", "0.35463660955429077", "0.6096717119216919", "0.7700327038764954", "0.4698445498943329" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "1", "0", "0", "0", "0", "1", "1", "0", "0", "0" ]
"train-other-500/7556/93308/7556-93308-0037"
"i know you would love me again if you would only let yourself won't you forgive me i can't he said briefly have you never done anything that needed to be forgiven i would forgive you anything in the world"
"video"
"other"
[ "I", "know", "you", "would", "love", "me", "again", "if", "you", "would", "only", "let", "yourself", "won't", "you", "forgive", "me", "I", "can't", "he", "said", "briefly", "have", "you", "never", "done", "anything", "that", "needed", "to", "be", "forgiven", "I", "would", "forgive", "you", "anything", "in", "the", "world" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.8419718742370605", "0.8419718742370605", "0.8860778212547302", "0.9128385782241821", "0.9128385782241821", "0.9087862968444824", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/7925/271896/7925-271896-0014"
"and sooner or later we must succumb to the inevitable the night was dark and moonless as to and fro i paced on sentry duty my post was a lonely one"
"video"
"other"
[ "and", "sooner", "or", "later", "we", "must", "succumb", "to", "the", "inevitable", "the", "night", "was", "dark", "and", "moonless", "as", "to", "and", "fro", "I", "pasted", "on", "Sentry", "Duty", "my", "post", "was", "alone", "in", "the", "one" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.4862422049045563", "0.7887459993362427", "0.7014397382736206", "0.8767064213752747", "0.7422804236412048", "0.45892342925071716", "0.9108321070671082", "0.8157646656036377", "0.840981125831604", "0.7014397382736206", "0.9128385782241821", "0.8950873017311096" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0" ]
"train-clean-360/7967/117095/7967-117095-0000"
"the martian boy at the feet of the old man stirred his thin reddish feet in the soil and affixed his large green eyes upon the burial hill where the piper stood why does he do that asked the boy"
"video"
"clean"
[ "the", "Martian", "boy", "at", "the", "feet", "of", "the", "old", "man", "stirred", "his", "thin", "reddish", "feet", "in", "the", "soil", "and", "a", "fixed", "his", "large", "green", "eyes", "upon", "the", "burial", "Hill", "where", "the", "piper", "stood", "why", "does", "he", "do", "that", "asked", "the", "boy" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9068438410758972", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7253629565238953", "0.9128385782241821", "0.7468314170837402" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/3409/173540/3409-173540-0000"
"so she decided that it was necessary to prepare the opinion of society she provoked the jealousy of the elderly magnate and told him what she had told her other suitor that is she put the matter so that the only way for him to obtain a right over her was to marry her"
"video"
"other"
[ "so", "she", "decided", "that", "it", "was", "necessary", "to", "prepare", "the", "opinion", "of", "society", "she", "provoked", "the", "jealousy", "of", "the", "elderly", "magnate", "and", "told", "him", "what", "she", "had", "told", "her", "other", "Suitor", "that", "is", "she", "put", "the", "matter", "so", "that", "the", "only", "way", "for", "him", "to", "obtain", "a", "right", "over", "her", "was", "to", "marry", "her" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.8919499516487122", "0.7062932252883911", "0.9128385782241821", "0.9128385782241821", "0.8888055682182312", "0.724481463432312", "0.8713022470474243", "0.8999998569488525", "0.7617586255073547", "0.7421078681945801", "0.9128385782241821", "0.9128385782241821", "0.840695321559906", "0.8888055682182312", "0.840695321559906", "0.7328335046768188", "0.9128385782241821", "0.8713022470474243", "0.8999999761581421", "0.768685519695282", "0.892828106880188", "0.9128385782241821", "0.7618708610534668", "0.7686437964439392" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/2042/148298/2042-148298-0033"
"and all with such pretty feastings and merrymakings as could be nowhere within four walls"
"video"
"other"
[ "a", "doll", "with", "such", "pretty", "feasting", "and", "merrymaking", "zaz", "could", "be", "nowhere", "within", "four", "walls" ]
[ "0.7181485295295715", "0.718148410320282", "0.5992921590805054", "0.8681128025054932", "0.7495564818382263", "0.39415937662124634", "0.9028240442276001", "0.3475036919116974", "0.3694104552268982", "0.7245048880577087", "0.5975514054298401", "0.7251026630401611", "0.7214266657829285", "0.7112824320793152", "0.43532589077949524" ]
[ "1", "1", "0", "0", "0", "1", "0", "0", "1", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/7000/83708/7000-83708-0027"
"he contented himself with what he called snicking that's what i call a pretty snick he said the snick in question was a tremendous drive to deep mid off it was stopped quite involuntarily by mister hawthorn and mister hedges"
"default"
"clean"
[ "he", "contented", "himself", "with", "what", "he", "called", "sneaking", "that's", "what", "I", "call", "a", "pretty", "snake", "he", "said", "the", "snake", "in", "question", "was", "a", "tremendous", "drive", "to", "deep", "mid", "off", "it", "was", "stopped", "quite", "involuntarily", "by", "mr.", "Hawthorne", "and", "mr.", "hedges" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.7988528609275818", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9595189094543457", "0.9595189094543457", "0.9723705053329468", "0.9876290559768677", "0.964260995388031", "0.549940288066864", "0.831209123134613", "0.8474687933921814", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.7902469635009766", "0.9007322788238525", "0.7346932888031006", "0.7904313206672668", "0.9876290559768677", "0.9876290559768677", "0.8476945757865906", "0.9876290559768677", "0.9333814978599548", "0.9761219024658203", "0.7844756841659546", "0.9876290559768677", "0.924476683139801", "0.9876290559768677", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "1", "0" ]
"train-other-500/29/130626/29-130626-0005"
"unused he might have been up stuffing himself with eggs and bacon irritating the dog or flirting with the slavey instead of sprawling there sunk in soul clogging oblivion it was a terrible thought"
"video"
"other"
[ "unused", "he", "might", "have", "been", "up", "stuffing", "himself", "with", "eggs", "and", "bacon", "irritating", "the", "dog", "or", "flirting", "with", "the", "slavi", "instead", "of", "spoiling", "their", "sunk", "in", "Seoul", "clogging", "Oblivion", "it", "was", "a", "terrible", "thought" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.6357526779174805", "0.63575279712677", "0.9128385782241821", "0.7679011225700378", "0.7191941142082214", "0.7615648508071899", "0.9128385782241821", "0.8655803203582764", "0.9005319476127625", "0.9128385782241821", "0.804039478302002", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "1", "1", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/1018/135221/1018-135221-0030"
"a land warmed not by the sun but from the earth itself or another cause might be found in the warm ocean currents whatever the true one might be i was utterly unable to form a conjecture but i had no time for such speculations as these"
"video"
"clean"
[ "I'll", "and", "warm", "not", "by", "the", "Sun", "but", "from", "the", "Earth", "itself", "or", "another", "cause", "might", "be", "found", "in", "the", "warm", "ocean", "currents", "whatever", "the", "true", "one", "might", "be", "I", "was", "utterly", "unable", "to", "form", "a", "conjecture", "but", "I", "had", "no", "time", "for", "such", "speculations", "as", "these" ]
[ "0.7230793237686157", "0.90704745054245", "0.9128385782241821", "0.8850884437561035", "0.9128385782241821", "0.8850882649421692", "0.9015903472900391", "0.7304139733314514", "0.771521270275116", "0.9015897512435913", "0.9128385782241821", "0.7772140502929688", "0.9128385782241821", "0.742621123790741", "0.771521270275116", "0.9128385782241821", "0.7715211510658264", "0.8056380748748779", "0.8850884437561035", "0.9128385782241821", "0.9128385782241821", "0.7375790476799011", "0.9044836163520813", "0.9128385782241821", "0.9015897512435913", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7304139733314514", "0.9128385782241821", "0.9015899896621704", "0.7492448687553406", "0.8362555503845215", "0.8206930160522461", "0.9128385782241821", "0.7230793833732605", "0.835327684879303", "0.9113860130310059", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.732473611831665", "0.8206930160522461", "0.8522517681121826", "0.7715216279029846", "0.9128385782241821", "0.9128385782241821" ]
[ "1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/1726/142316/1726-142316-0001"
"missus browning the shock had been great margaret fell into a state of prostration which did not show itself in sobs and tears or even find the relief of words"
"video"
"other"
[ "mrs.", "Browning", "the", "shock", "had", "been", "great", "Margaret", "fell", "into", "a", "state", "of", "prostration", "which", "did", "not", "show", "itself", "in", "sobs", "and", "tears", "or", "even", "find", "a", "relief", "of", "words" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0" ]
"train-clean-360/240/160593/240-160593-0026"
"with just the door ajar that oceans are and prayer and that pale sustenance despair"
"video"
"clean"
[ "with", "just", "the", "door", "ajar", "the", "oceans", "are", "and", "prayer", "and", "that", "pale", "sustenance", "despair" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7675513029098511", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/1913/147989/1913-147989-0022"
"overlooked things is she still going with larry donovan oh that's on worse than ever i guess they're engaged tony talks about him like he was president of the railroad everybody laughs about it because she was never a girl to be soft"
"default"
"clean"
[ "overlooked", "thanks", "is", "she", "still", "going", "with", "Larry", "Donovan", "that", "sound", "worse", "than", "ever", "I", "guess", "they're", "engaged", "Tony", "talks", "about", "him", "like", "he", "was", "president", "of", "the", "railroad", "everybody", "last", "about", "it", "because", "she", "was", "never", "a", "girl", "to", "be", "soft" ]
[ "0.8013977408409119", "0.49347469210624695", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9407680034637451", "0.5232656598091125", "0.6502416729927063", "0.9265262484550476", "0.9876290559768677", "0.8027899861335754", "0.9352907538414001", "0.9125717878341675", "0.9864031076431274", "0.9876290559768677", "0.9830678701400757", "0.9383467435836792", "0.9343542456626892", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.5015678405761719", "0.8124181628227234", "0.9243772625923157", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677" ]
[ "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/6254/61725/6254-61725-0008"
"the conversation was carried on in low tones it was known that the commandant of the post was present as well as others in authority and this checked any propensity there might have been for noisy demonstration"
"video"
"other"
[ "the", "conversation", "was", "carried", "on", "in", "low", "tones", "it", "was", "known", "that", "the", "kommandant", "of", "the", "post", "was", "present", "as", "well", "as", "others", "in", "authority", "and", "this", "checked", "any", "propensity", "there", "might", "have", "been", "for", "noisy", "demonstration" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7552191019058228", "0.9128385782241821", "0.83707594871521", "0.9128385782241821", "0.9128385782241821", "0.866148829460144", "0.9128385782241821", "0.9128385782241821", "0.8814480900764465", "0.802876353263855", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/1058/131113/1058-131113-0018"
"and the greeks began to despair of ever subduing it by force and by advice of ulysses resolved to resort to stratagem they pretended to be making preparations to abandon the siege"
"default"
"clean"
[ "and", "the", "Greeks", "began", "to", "despair", "of", "ever", "subduing", "it", "by", "force", "and", "by", "advice", "of", "Ulysses", "resolved", "to", "resort", "to", "stratagem", "they", "pretended", "to", "be", "making", "preparations", "to", "abandon", "The", "Siege" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9636479020118713", "0.9208335280418396", "0.977829098701477", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.5413496494293213", "0.7905259132385254", "0.9876290559768677", "0.9876290559768677", "1.0", "0.9785504341125488", "0.9259021282196045", "0.9876290559768677", "0.9579329490661621", "0.6097143292427063", "0.9831990599632263", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9870409965515137", "0.9876290559768677", "0.9876290559768677", "0.9829249978065491", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "1.0" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/7647/102250/7647-102250-0052"
"was the one all pervading bond of sympathy and agreement"
"video"
"clean"
[ "what's", "the", "one", "all-pervading", "Bond", "of", "sympathy", "and", "agreement" ]
[ "0.9128385782241821", "0.9128385782241821", "0.7251547574996948", "0.8557422757148743", "0.9128385782241821", "0.8121954202651978", "0.7454695701599121", "0.8121381402015686", "0.8841344118118286" ]
[ "1", "0", "0", "1", "0", "0", "0", "0", "0" ]
"train-other-500/8112/281888/8112-281888-0027"
"going through a crowd of gentlemen it is best at a ball to dance only every other dance as over fatigue and probably a flushed face will follow too much dancing"
"video"
"other"
[ "going", "through", "a", "crowd", "of", "gentlemen", "it", "is", "best", "at", "a", "ball", "to", "dance", "only", "every", "other", "dance", "as", "over", "fatigue", "and", "probably", "a", "flushed", "face", "will", "follow", "too", "much", "dancing" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.5750316977500916", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.8729328513145447", "0.7593781352043152", "0.8934687376022339", "0.9128385782241821", "0.8500496745109558", "0.9128385782241821", "0.7389792203903198", "0.9081001281738281", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.8264782428741455" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/815/130105/815-130105-0006"
"the rifle was lowered to a position of caution and the loud soldier came slowly forward he peered into the youth's face that you henry yes it's it's me"
"default"
"clean"
[ "the", "rifle", "was", "lowered", "to", "a", "position", "of", "caution", "and", "Loud", "Soldier", "came", "slowly", "forward", "he", "peered", "into", "the", "youths", "face", "that", "you", "Henry", "yes", "it's", "it's", "me" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.929469645023346", "0.9876290559768677", "0.9832367300987244", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.5228489637374878", "0.9675411581993103", "0.9635805487632751", "0.9876290559768677", "0.9628154039382935", "0.9259874820709229", "0.9876290559768677", "0.9708244800567627", "0.9702975153923035", "0.6478843688964844", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9522522687911987", "0.9568999409675598", "1.0", "1.0", "0.9429847002029419" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/8625/295957/8625-295957-0007"
"then all the warriors and the young men picked out their best and fastest horses and made ready to start among those who prepared for the charge was the poor boy on the old dun horse but when they saw him all the rich young braves on their fast horses pointed at him and said oh see"
"video"
"other"
[ "then", "all", "the", "Warriors", "and", "the", "young", "men", "picked", "out", "their", "best", "and", "fastest", "horses", "and", "made", "ready", "to", "start", "among", "those", "who", "prepared", "for", "the", "charge", "was", "the", "poor", "boy", "on", "the", "old", "done", "horse", "but", "when", "they", "saw", "him", "all", "the", "Rich", "Young", "Braves", "on", "their", "fast", "horses", "pointed", "at", "him", "and", "said", "oh", "see" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.719404399394989", "0.9128385782241821", "0.764234721660614", "0.9128385782241821", "0.8748888373374939", "0.9128385782241821", "0.8945150971412659", "0.8119681477546692", "0.7278205752372742", "0.9101690649986267" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/5545/66419/5545-66419-0035"
"and thought with a man's thought unaware that nowhere else would she have chosen to live"
"video"
"other"
[ "and", "thought", "with", "a", "man's", "thought", "unaware", "that", "nowhere", "else", "would", "she", "have", "chosen", "to", "live" ]
[ "0.9128385782241821", "0.9128385782241821", "0.8752250075340271", "0.8752250075340271", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9092674851417542", "0.6340042948722839", "0.7220142483711243", "0.7686935663223267", "0.7265493869781494", "0.7192034721374512", "0.9128385782241821", "0.9128385782241821", "0.8582894802093506" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/705/150468/705-150468-0022"
"and arrested in his presence fully alive now to the impropriety of his conduct and repenting of it rostov with downcast eyes was making his way out of the house"
"video"
"other"
[ "and", "arrested", "in", "his", "presence", "Fully", "Alive", "now", "to", "the", "impropriety", "of", "his", "conduct", "and", "repenting", "of", "it", "roast", "of", "with", "downcast", "eyes", "was", "making", "his", "way", "out", "of", "the", "house" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.711851179599762", "0.5978341102600098", "0.7576640844345093", "0.706102728843689", "0.6081787943840027", "0.8450454473495483", "0.9128385782241821", "0.7425352931022644", "0.6416825652122498", "0.6765013933181763", "0.7508057951927185", "0.9128385782241821", "0.8450454473495483" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/7242/275146/7242-275146-0055"
"she slowly moved away to the door and turned back remembering the packet of letters she took it up paused and looked toward the window the streets still interested him"
"default"
"other"
[ "she", "slowly", "moved", "away", "to", "the", "door", "and", "turned", "back", "a", "memo", "in", "the", "packet", "of", "letters", "she", "took", "it", "up", "post", "and", "looked", "over", "the", "window", "the", "street", "still", "interested" ]
[ "0.7274213433265686", "0.8652735948562622", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.8488320112228394", "0.9876290559768677", "0.4953446686267853", "0.4823721945285797", "0.48071712255477905", "0.8558641672134399", "0.9240853786468506", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9695073366165161", "0.9695073366165161", "0.7993296980857849", "0.9421553015708923", "0.8564619421958923", "0.9876290559768677", "0.8655269742012024", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "1", "0", "0", "0", "1", "0", "0" ]
"train-other-500/5000/30597/5000-30597-0048"
"but that disappointment at least was spared me for at night she came into my room knowing of my longing"
"default"
"other"
[ "but", "that's", "appointment", "at", "least", "was", "spared", "me", "fright", "night", "she", "came", "into", "my", "room", "knowing", "of", "my", "lungs" ]
[ "0.951736569404602", "0.7336247563362122", "0.7258293628692627", "0.974955141544342", "0.9876290559768677", "0.9430676102638245", "0.9869129657745361", "0.9760379195213318", "0.4693167507648468", "0.5763800740242004", "1.0", "1.0", "0.8039129972457886", "0.9879381060600281", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.7319900989532471" ]
[ "0", "1", "1", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1" ]
"train-clean-360/2201/160438/2201-160438-0027"
"he feels that an artificer is the servant of his customers from whom he derives his subsistence but that a planter who cultivates his own land and derives his necessary subsistence from the labour of his own family is really a master and independent of all the world"
"default"
"clean"
[ "he", "feels", "that", "an", "artificer", "is", "the", "servant", "of", "his", "customers", "from", "whom", "he", "derives", "his", "subsistence", "but", "that", "a", "planter", "who", "cultivates", "his", "own", "land", "and", "derives", "his", "necessary", "subsistence", "from", "the", "labor", "of", "his", "own", "family", "is", "really", "a", "master", "and", "independent", "of", "all", "the", "world" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.8491219282150269", "0.8895143866539001", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9467206001281738", "0.9876290559768677", "0.9131017923355103", "0.940984845161438", "0.9876290559768677", "0.9771053791046143", "1.0", "0.9876290559768677", "0.9122912287712097", "0.9876290559768677", "0.9189056158065796", "0.9876290559768677", "0.9387009739875793", "0.9876290559768677", "0.9876290559768677", "0.9542729258537292", "0.9047430157661438", "0.9466156959533691", "0.9731100797653198", "0.9767441749572754", "0.9876290559768677", "0.8068264126777649", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.7309713363647461", "0.5591108202934265", "0.518622636795044", "0.9781191349029541", "0.9876290559768677", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/7065/74598/7065-74598-0043"
"but perhaps not altogether impossible for the colours of bodies arise from the magnitudes of the particles which reflect them as was explained above now if we conceive these particles of bodies to be so disposed amongst themselves"
"default"
"other"
[ "took", "the", "hats", "not", "altogether", "impossible", "for", "the", "colors", "of", "bodies", "arise", "from", "the", "magnitude", "of", "the", "particles", "which", "reflect", "them", "as", "was", "explained", "above", "know", "if", "we", "can", "save", "these", "particles", "of", "bodies", "to", "be", "so", "disposed", "amongst", "themselves" ]
[ "0.3370622992515564", "0.9592282176017761", "0.6519278287887573", "0.9876290559768677", "0.8615766763687134", "0.9876290559768677", "0.9887291789054871", "0.9723052978515625", "0.9219621419906616", "0.9554607272148132", "0.9600620269775391", "1.0", "0.9876290559768677", "0.9876290559768677", "0.5531437397003174", "0.7718145251274109", "0.9876290559768677", "0.9386990666389465", "0.9876290559768677", "0.9768573641777039", "1.0", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.8322513103485107", "0.7885363698005676", "0.9876290559768677", "0.9337053298950195", "0.9342755675315857", "0.9645109176635742", "0.8645198941230774", "0.9275574684143066", "1.0", "0.8563946485519409", "0.8563920259475708", "0.9876290559768677", "0.9764763116836548", "0.9698435664176941", "0.9876290559768677" ]
[ "1", "1", "1", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/1353/134827/1353-134827-0029"
"a little girl was going and coming running to warm herself laughing singing at the top of her voice alas in what are the plays of children not intermingled it was this little girl whom fantine heard singing"
"default"
"other"
[ "a", "little", "girl", "was", "going", "and", "coming", "running", "to", "warm", "herself", "laughing", "singing", "at", "the", "top", "of", "her", "voice", "a", "last", "and", "what", "are", "the", "plays", "of", "children", "not", "intermingled", "it", "was", "this", "little", "girl", "who", "Fontan", "heard", "singing" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9863137602806091", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9779266119003296", "0.9876290559768677", "0.8020117282867432", "0.7998723387718201", "0.8771988749504089", "0.8490155339241028", "0.9305495619773865", "1.0", "0.9504184126853943", "0.9876290559768677", "0.9815748929977417", "0.9896583557128906", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.7900912165641785", "0.8115358948707581", "0.781501829624176", "0.9197583794593811", "0.8854993581771851", "0.47088930010795593", "0.890433669090271" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0" ]
"train-clean-360/3728/105398/3728-105398-0004"
"what was there strange in the fact that sir percy should have chosen to use the device as a seal ring he might easily have done that yes quite easily and besides what connection could there be between her exquisite dandy of a husband"
"video"
"clean"
[ "what", "was", "their", "strange", "in", "the", "fact", "that", "sir", "Percy", "should", "have", "chosen", "to", "use", "the", "devices", "as", "seal", "ring", "he", "might", "easily", "have", "done", "that", "yes", "quite", "easily", "and", "besides", "what", "connection", "could", "there", "be", "between", "her", "Exquisite", "Dandy", "of", "a", "husband" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7557292580604553", "0.755729079246521", "0.9128385782241821", "0.9084696173667908", "0.9128385782241821", "0.9128385782241821", "0.8272255063056946", "0.9128385782241821", "0.6972233653068542", "0.6035528779029846", "0.8912856578826904", "0.7325373291969299", "0.9128385782241821", "0.8726999163627625", "0.6999870538711548", "0.7105150818824768", "0.8380675315856934", "0.7279574275016785", "0.9128385782241821", "0.7236838340759277", "0.6970781087875366", "0.9128385782241821", "0.9124112725257874", "0.9128385782241821", "0.9128385782241821", "0.6682460308074951", "0.7105150818824768", "0.6919493675231934", "0.6437233686447144", "0.9128385782241821", "0.7972034215927124", "0.9128385782241821", "0.7223937511444092", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/398/123602/398-123602-0022"
"they cared for nothing and idled me into a fit of sickness but my first mate was a good excellent man with no more idea of being frozen in there until spring than i had so"
"video"
"clean"
[ "they", "cared", "for", "nothing", "and", "I", "told", "me", "into", "a", "fit", "of", "sickness", "but", "my", "first", "mate", "was", "a", "good", "excellent", "man", "with", "no", "more", "idea", "of", "being", "frozen", "in", "there", "until", "spring", "that", "I", "had", "so" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7380351424217224", "0.8667464852333069", "0.9056105017662048", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0" ]
"train-other-500/7556/103535/7556-103535-0020"
"her eager thumb was almost under the flap of the envelope when she hesitated eyed the letter uncertainly and thrust it into the pocket of her calico gown all day it lay there save at times which indeed were of frequent occurrence"
"video"
"other"
[ "hurry", "go", "from", "was", "almost", "under", "the", "flap", "of", "the", "envelope", "when", "she", "hesitated", "Ida", "letter", "uncertainly", "and", "thrust", "it", "into", "the", "pocket", "of", "her", "Calico", "gown", "all", "day", "it", "later", "safe", "at", "times", "which", "indeed", "were", "a", "frequent", "occurrence" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.675108015537262", "0.9128385782241821", "0.9128385782241821", "0.7601001858711243", "0.7197750210762024", "0.7993083000183105", "0.9128385782241821", "0.7729439735412598", "0.853675901889801", "0.9128385782241821", "0.8174905180931091", "0.6663349270820618", "0.7468892931938171", "0.9128385782241821", "0.9128385782241821", "0.6696946620941162", "0.9128385782241821", "0.7468892931938171", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.5647017359733582" ]
[ "1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "1", "0", "0" ]
"train-other-500/6407/66983/6407-66983-0101"
"and under some form is christ or the soul of christ he is in romeo and juliet in the winter's tale"
"video"
"other"
[ "and", "under", "some", "form", "is", "Christ", "or", "the", "soul", "of", "Christ", "he", "is", "in", "Romeo", "and", "Juliet", "in", "the", "Winter's", "Tale" ]
[ "0.9128385782241821", "0.908393383026123", "0.9040485620498657", "0.9128385782241821", "0.9128385782241821", "0.7455154061317444", "0.9084360599517822", "0.9128385782241821", "0.9128385782241821", "0.8892764449119568", "0.721506655216217", "0.9128385782241821", "0.7661807537078857", "0.9128385782241821", "0.8700999617576599", "0.7394983172416687", "0.704734206199646", "0.9128385782241821", "0.7259796261787415", "0.7185097932815552", "0.7514520287513733" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/6341/64956/6341-64956-0067"
"and i heard john and miss stevens maid phillis talking together it was moonlight you know papa she went on turning her face toward him again and they were out under the trees and john had his arm round her and he was kissing her"
"default"
"clean"
[ "and", "I", "heard", "John", "and", "Miss", "Stephens", "made", "Phyllis", "talkin", "together", "it", "was", "Moonlight", "you", "know", "Papa", "she", "went", "on", "turning", "her", "face", "to", "him", "again", "and", "they", "were", "out", "under", "the", "trees", "and", "John", "had", "his", "arm", "around", "her", "and", "he", "was", "kissing", "her" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.7910599708557129", "0.9512823820114136", "0.9876290559768677", "0.8742004632949829", "0.8443005084991455", "0.8603208661079407", "0.491659015417099", "0.9876290559768677", "0.9241390228271484", "0.9876290559768677", "0.9387301206588745", "0.962273359298706", "0.7902071475982666", "0.809981644153595", "0.931797981262207", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.800308108329773", "0.8853344917297363", "0.8754320740699768", "0.9530757069587708", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9640440344810486", "0.9876290559768677", "0.9876290559768677", "0.9664115905761719", "0.9244206547737122", "0.8994327187538147", "0.9639305472373962", "0.9532480239868164", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "0", "1", "1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0" ]
"train-other-500/3595/176633/3595-176633-0008"
"he was conducted to her bed which he approached in the utmost agitation and perceiving her to all appearance asleep essayed to wake her with a gentle kiss but this method proved ineffectual"
"default"
"other"
[ "he", "was", "conducted", "to", "have", "bad", "when", "she", "approached", "in", "The", "Optimist", "agitation", "deceiving", "her", "to", "order", "pens", "to", "sleep", "to", "wake", "up", "with", "a", "gentle", "care", "but", "this", "message", "proved", "ineffectual" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.8411885499954224", "0.9876290559768677", "0.9714447855949402", "0.9876290559768677", "0.9876290559768677", "0.8360073566436768", "0.9876290559768677", "0.7603943943977356", "0.9876290559768677", "0.9127399921417236", "0.8154416680335999", "0.6285102367401123", "0.7847744226455688", "0.7903221845626831", "0.30493882298469543", "0.5080803632736206", "0.8861920237541199", "0.7314539551734924", "0.9779855012893677", "0.9876290559768677", "0.5603539347648621", "0.7945755124092102", "0.8524960279464722", "0.8923940658569336", "0.8606371879577637", "0.9876290559768677" ]
[ "0", "0", "0", "0", "1", "1", "1", "1", "0", "0", "0", "1", "0", "1", "0", "0", "1", "1", "0", "1", "1", "0", "1", "0", "0", "0", "1", "0", "0", "1", "0", "0" ]
"train-clean-360/1322/137588/1322-137588-0019"
"well said the damosel ye are not wise to keep the sword from me for ye shall slay with the sword the best friend that ye have and the man that ye most love in the world and the sword shall be your destruction i shall take the adventure said balin"
"default"
"clean"
[ "wow", "so", "the", "damosel", "you're", "not", "wise", "to", "keep", "the", "sword", "from", "me", "for", "you", "shall", "slay", "with", "the", "sword", "of", "the", "best", "friends", "that", "you", "have", "and", "the", "man", "that", "you", "most", "loved", "in", "the", "world", "and", "the", "sword", "shall", "be", "your", "destruction", "I", "shall", "take", "the", "adventure", "said", "Valen" ]
[ "0.936352014541626", "0.9635066390037537", "0.9876290559768677", "0.8594222664833069", "0.8296345472335815", "0.9243630766868591", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.8149117231369019", "0.8125498294830322", "0.8144227266311646", "0.9876290559768677", "0.9876290559768677", "0.7861526012420654", "0.9876290559768677", "0.9876290559768677", "0.5198249816894531", "0.9876290559768677", "0.6644774079322815", "0.8868762850761414", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9745932221412659", "0.9616917967796326", "0.4377342760562897", "0.9876290559768677", "0.7699121832847595", "0.6469618678092957", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.979001522064209", "1.0", "0.9876290559768677", "0.9748697280883789", "0.9625301361083984", "0.820793628692627", "0.4860963821411133" ]
[ "1", "1", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "1", "0", "0", "1", "0", "1", "0", "0", "0", "0", "0", "1", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1" ]
"train-other-500/5890/72188/5890-72188-0029"
"putting on an air of pretended pique babies do you call me a baby"
"default"
"other"
[ "putting", "on", "a", "repetitive", "pic", "babies", "do", "you", "calm", "your", "baby" ]
[ "0.9427939653396606", "0.8083164095878601", "0.7957881093025208", "0.6021417379379272", "0.8171297907829285", "0.8631883859634399", "0.9621852040290833", "1.0", "0.9114504456520081", "0.9054905772209167", "0.9577553272247314" ]
[ "0", "0", "1", "1", "1", "0", "0", "0", "1", "1", "0" ]
"train-other-500/7346/91900/7346-91900-0067"
"but who was moreover guilty of incest and other infamous vices that in their blindness the idolators had nevertheless immolated oxen"
"video"
"other"
[ "but", "who", "was", "moreover", "guilty", "of", "incest", "and", "other", "Infamous", "vices", "that", "in", "their", "blindness", "the", "idolaters", "had", "nevertheless", "immolated", "oxen" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7170182466506958", "0.9128385782241821", "0.8911890387535095", "0.9128385782241821", "0.7853133678436279", "0.9128385782241821", "0.9128385782241821", "0.8980937004089355", "0.734107494354248", "0.9128385782241821", "0.9035480618476868", "0.8785676956176758", "0.8911890387535095", "0.9035479426383972", "0.9128385782241821", "0.7487718462944031", "0.7609058022499084", "0.8533509373664856" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0" ]
"train-clean-360/5740/39910/5740-39910-0043"
"mollie thought her calendar just too pretty for anything and jimmy was sure the new red mittens which maggie had knitted for him with her own chubby wee fingers were the very nicest gayest mittens a boy had ever worn"
"video"
"clean"
[ "Molly", "thought", "her", "calendar", "just", "too", "pretty", "for", "anything", "and", "Jimmy", "was", "sure", "the", "new", "red", "mittens", "which", "Maggie", "had", "knitted", "for", "him", "with", "her", "own", "chubby", "we", "fingers", "with", "the", "very", "nicest", "gayest", "mittens", "a", "boy", "had", "ever", "worn" ]
[ "0.8903725147247314", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7499192953109741", "0.9128385782241821", "0.8577083349227905", "0.7666082978248596", "0.9128385782241821", "0.9128385782241821", "0.834109902381897", "0.7466875314712524", "0.7678101658821106", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7697256207466125", "0.8628568053245544", "0.9128385782241821", "0.8862680196762085", "0.8862680196762085", "0.9128385782241821", "0.814970076084137", "0.7723888158798218", "0.7484170794487", "0.719907820224762", "0.9128385782241821", "0.9128385782241821", "0.9086856842041016", "0.9128385782241821", "0.7447888255119324", "0.9128385782241821", "0.8925805687904358", "0.5554836392402649", "0.9128385782241821", "0.6762770414352417", "0.7167421579360962", "0.9128385782241821", "0.8007978796958923", "0.8695729374885559" ]
[ "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/766/127195/766-127195-0001"
"i snatched in the darkness and my fingers caught a bottle of burgundy for a few minutes there was a tussle the bottle struck the floor and broke"
"video"
"other"
[ "I", "snatched", "up", "the", "darkness", "and", "my", "fingers", "caught", "a", "bottle", "of", "burgundy", "for", "a", "few", "minutes", "there", "was", "a", "tussle", "the", "bottle", "struck", "the", "floor", "and", "broke" ]
[ "0.768072783946991", "0.9128385782241821", "0.9088785648345947", "0.8443617820739746", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.890613853931427", "0.9128385782241821", "0.9128385782241821", "0.9088785648345947", "0.8642802834510803", "0.9128385782241821", "0.7680729031562805", "0.9088784456253052", "0.9128385782241821", "0.8443617820739746", "0.8982768058776855", "0.9128385782241821", "0.8962960243225098", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9088785648345947", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/6037/59234/6037-59234-0006"
"even at the cost of a seemingly inevitable doom of annihilation what starry crowns can history award worthy of that belgian nation and of their king who did not fear to bid them set themselves there as a barrier king albert of belgium"
"default"
"clean"
[ "even", "at", "the", "cost", "of", "a", "seemingly", "inevitable", "tomb", "of", "annihilation", "what", "story", "crowns", "can", "history", "award-worthy", "of", "that", "Belgian", "Nation", "I", "know", "that", "King", "who", "did", "not", "fear", "to", "B", "them", "set", "themselves", "there's", "a", "barrier", "King", "Albert", "of", "Belgium" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9394350051879883", "0.9876290559768677", "0.9676984548568726", "0.9418515563011169", "0.9379527568817139", "0.9579989314079285", "0.9876290559768677", "0.9876290559768677", "0.8034976124763489", "0.8390101194381714", "0.9840219020843506", "0.8506564497947693", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.3222247362136841", "0.9876290559768677", "0.5274173021316528", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9376852512359619", "0.9585219621658325", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "1", "0", "0", "0", "1", "0", "0", "0", "0", "1", "1", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0" ]
"train-other-500/5009/29142/5009-29142-0043"
"and was for washington he opened it there was a note saying that enclosed he would please find a bill for the current year's taxes on the seventy five thousand acres of tennessee land belonging to the estate of silas hawkins deceased"
"video"
"other"
[ "and", "was", "for", "Washington", "he", "opened", "it", "there", "was", "a", "note", "saying", "that", "enclosed", "he", "would", "please", "find", "a", "bill", "for", "the", "current", "year's", "taxes", "on", "the", "75", "Thousand", "Acres", "of", "Tennessee", "land", "belonging", "to", "the", "estate", "of", "Silas", "Hawkins", "deceased" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9022412300109863", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.8937054872512817", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9004449248313904", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.8928852677345276", "0.7286498546600342", "0.9128385782241821", "0.8915484547615051", "0.7236464023590088", "0.854749858379364", "0.8011451363563538", "0.7683852314949036", "0.9128385782241821", "0.7196686863899231", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/1649/68762/1649-68762-0009"
"not all of them there must be some refinement among them but the lawyer was not convinced however it was not his desire to stifle this new born enthusiasm of kenneth's even though he believed it misdirected"
"default"
"clean"
[ "not", "all", "of", "them", "there", "must", "be", "some", "refinement", "among", "them", "but", "the", "lawyer", "was", "not", "convinced", "however", "it", "was", "not", "his", "desire", "to", "stifle", "this", "newborn", "enthusiasm", "of", "Kenneth's", "even", "though", "he", "believed", "it", "was", "directed" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9642602205276489", "0.9767081141471863", "0.9522112011909485", "0.9820881485939026", "0.9569540023803711", "0.9876290559768677", "0.9876290559768677", "0.9794962406158447", "0.9816747307777405", "0.9273161292076111", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.5730618834495544", "0.9876290559768677", "0.8875811100006104", "0.9656469225883484", "0.8062440752983093", "0.5216174125671387", "0.8534602522850037", "0.9642835855484009", "0.9511533379554749", "0.944111704826355", "0.9614162445068359", "0.941198468208313", "0.977159321308136" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1" ]
"train-other-500/3588/180957/3588-180957-0007"
"parched with thirst i caught my father's arm and tried to urge him on toward the blue enchantment of ecstatic living water but to my surprise he staggered back and his face grew as white as the distant snow"
"video"
"other"
[ "parched", "with", "thirst", "I", "caught", "my", "father's", "arm", "and", "tried", "to", "urge", "him", "on", "toward", "the", "blue", "Enchantment", "of", "ecstatic", "living", "water", "but", "to", "my", "surprise", "he", "staggered", "back", "and", "his", "face", "grew", "as", "white", "after", "distance", "no" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.3584540784358978" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1" ]
"train-other-500/366/129285/366-129285-0036"
"and the next vilkins i have got names ready made to the end of the alphabet and all the way through it again when we come to z why you're quite a literary character sir said missus mann well well said the beadle"
"default"
"other"
[ "and", "the", "next", "to", "Phil", "Collins", "I", "have", "gotten", "a", "ready-made", "to", "the", "end", "of", "the", "alphabet", "and", "all", "the", "way", "through", "it", "again", "only", "come", "to", "Z", "why", "you're", "quite", "a", "literary", "character", "sir", "send", "mrs.", "man", "well", "well", "to", "the", "beetle" ]
[ "0.941457211971283", "0.944904625415802", "0.95595782995224", "0.9388803839683533", "0.940915048122406", "0.9409005641937256", "0.9814110398292542", "0.9866093993186951", "0.3981972336769104", "0.4314953684806824", "0.4314953684806824", "0.3070164918899536", "0.9491061568260193", "0.9876290559768677", "0.9876290559768677", "0.7820020914077759", "0.8045641183853149", "0.8046320676803589", "0.8439169526100159", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.6443147659301758", "0.8561609983444214", "0.8024978637695312", "0.9280446767807007", "0.8755284547805786", "0.9339523911476135", "0.9735906720161438", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9854996204376221", "0.7353758811950684", "0.2393610179424286", "0.7885381579399109", "0.9383691549301147", "0.946930468082428", "0.8780709505081177", "0.9100139737129211", "0.2469072937965393" ]
[ "0", "0", "0", "1", "1", "1", "0", "0", "1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0", "0", "1", "0", "1" ]
"train-clean-360/5246/5084/5246-5084-0028"
"patting the neck of the beautiful animal on which she rode had not got among the cliffs you would have had little cause for boasting they met as she spoke"
"default"
"clean"
[ "patting", "the", "neck", "of", "the", "beautiful", "animal", "I", "wish", "you", "rode", "had", "not", "got", "among", "the", "cliffs", "you", "would", "have", "had", "little", "cause", "for", "boasting", "they", "met", "as", "she", "spoke" ]
[ "0.5596827864646912", "0.8984301090240479", "0.9751588702201843", "0.9876290559768677", "0.6507294178009033", "0.9437329173088074", "0.8437339067459106", "0.9831296801567078", "0.9199730157852173", "0.9839521050453186", "0.9392441511154175", "0.9747753739356995", "0.960578441619873", "0.9876290559768677", "0.959702730178833", "0.9618368148803711", "0.9616612195968628", "0.9755347967147827", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9652193784713745", "1.0", "0.9710760712623596", "0.9876290559768677", "1.0", "0.9746188521385193", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/5618/48830/5618-48830-0021"
"was almost too dear missus roper's tea table was not pleasant to him"
"video"
"clean"
[ "was", "almost", "too", "dear", "Mrs", "Roper's", "tea-table", "was", "not", "pleasant", "to", "him" ]
[ "0.9128385782241821", "0.6097449064254761", "0.9128385782241821", "0.7136570811271667", "0.9128385782241821", "0.6034660339355469", "0.5677770972251892", "0.9128385782241821", "0.6539145112037659", "0.6158460378646851", "0.7126898169517517", "0.6158460378646851" ]
[ "0", "0", "0", "0", "1", "0", "1", "0", "0", "0", "0", "0" ]
"train-other-500/5296/26680/5296-26680-0008"
"ahem ahem your lordship on behalf of your mother having come expressly he turned again to david ah now don't be at all alarmed i beg of you i see i have disturbed you she is quite well or was a week or more ago"
"default"
"other"
[ "I", "have", "him", "your", "lordship", "on", "behalf", "of", "your", "mother", "having", "come", "expressly", "he", "turned", "against", "David", "now", "don't", "be", "at", "all", "alarmed", "I", "beg", "of", "you", "I", "see", "I", "have", "Disturbed", "you", "she", "is", "quite", "well", "or", "was", "a", "week", "or", "more", "ago" ]
[ "0.5516884326934814", "0.39791586995124817", "0.6233252882957458", "0.953098475933075", "0.9789507985115051", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9838148951530457", "0.9464449882507324", "0.9779449701309204", "0.9876290559768677", "0.8415130972862244", "0.9876290559768677", "0.8837253451347351", "1.0", "0.9374006390571594", "0.9545380473136902", "0.9637982249259949", "0.9320902228355408", "0.9242839217185974", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9705114364624023", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9643502235412598", "0.9876290559768677", "0.9573566317558289", "0.9753420948982239", "0.9876290559768677", "0.9876290559768677", "0.9640305042266846" ]
[ "1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/3779/691/3779-691-0016"
"ashamed of his own thoughts and blamed himself when they kept recurring to him during the last month what do i know about love and women and how can i decide such questions he thought reproachfully"
"video"
"other"
[ "ashamed", "of", "his", "own", "thoughts", "and", "blamed", "himself", "when", "they", "kept", "reoccurring", "to", "him", "during", "the", "last", "month", "what", "do", "I", "know", "about", "love", "and", "woman", "and", "how", "can", "I", "decide", "such", "questions", "he", "thought", "reproachfully" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/3571/173553/3571-173553-0028"
"he said to himself what an incredible event"
"video"
"other"
[ "he", "said", "to", "himself", "what", "an", "incredible", "event" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/6248/302581/6248-302581-0032"
"who were very ferocious and extremely jealous of the territory they had come through years of occupation to regard as their own so it happened that one day late in the afternoon i was returning from an expedition over the plains and"
"default"
"other"
[ "who", "were", "very", "ferocious", "and", "extremely", "jealous", "of", "the", "territory", "they", "had", "to", "come", "through", "years", "of", "occupation", "to", "regard", "as", "their", "own", "so", "they", "happened.", "One", "day", "late", "in", "the", "afternoon", "I", "was", "returning", "from", "an", "expedition", "of", "the", "Plains" ]
[ "0.9876290559768677", "0.5440526604652405", "0.7922009825706482", "0.9694994688034058", "1.0", "0.9640071392059326", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "1.0", "0.9463832378387451", "0.9309159517288208", "0.8221181035041809", "0.9876290559768677", "0.9393091201782227", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9714760184288025", "0.9450058937072754", "0.9581337571144104", "0.9876290559768677", "0.8140870928764343", "0.9252364635467529", "0.9383196234703064", "0.9614925980567932", "0.9030487537384033", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9731739163398743", "0.8643301129341125", "0.9876290559768677", "0.9876290559768677", "0.7889158129692078" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0" ]
"train-clean-360/362/123013/362-123013-0001"
"the river banks were rather high and the bridge instead of rising went across just level so that in the middle if the river was full the water would be nearly up to the woodwork and planks"
"video"
"clean"
[ "the", "Riverbanks", "were", "rather", "High", "and", "the", "bridge", "instead", "of", "rising", "went", "across", "just", "level", "so", "that", "in", "the", "middle", "if", "the", "river", "was", "full", "the", "water", "would", "be", "nearly", "up", "to", "the", "woodwork", "and", "the", "plagues" ]
[ "0.9128385782241821", "0.390051007270813", "0.7216305732727051", "0.7038428783416748", "0.4643435478210449", "0.6848273873329163", "0.9128385782241821", "0.7003255486488342", "0.7170636653900146", "0.7451760172843933", "0.6479485630989075", "0.9128385782241821", "0.6384240984916687", "0.6384240984916687", "0.7394275665283203", "0.9128385782241821", "0.5121684670448303", "0.9128385782241821", "0.7038429379463196", "0.5121684670448303", "0.9128385782241821", "0.9128385782241821", "0.7460629940032959", "0.5630766153335571", "0.5718745589256287", "0.9128385782241821", "0.537465512752533", "0.9128385782241821", "0.639079749584198", "0.728460431098938", "0.9128385782241821", "0.9128385782241821", "0.7216305732727051", "0.8115250468254089", "0.6848273873329163", "0.9128385782241821", "0.7247481346130371" ]
[ "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1" ]
"train-clean-360/2652/157644/2652-157644-0022"
"as from other entries in mister moore's account book we know that two ounces cost him one shilling we may wonder what missus moore was going to do with her half ounce"
"default"
"clean"
[ "as", "for", "mother", "entries", "in", "Mr", "Moore's", "account", "book", "we", "know", "that", "2", "oz", "costume", "one", "shilling", "we", "may", "wonder", "what", "mrs.", "Moore", "was", "going", "to", "do", "with", "her", "half", "ounce" ]
[ "0.8559457063674927", "0.48306336998939514", "0.32090258598327637", "0.9735273718833923", "0.9719681143760681", "0.5551761388778687", "0.8369475603103638", "0.9815949201583862", "0.976246178150177", "0.9876290559768677", "0.9876290559768677", "0.9503815174102783", "0.9503815174102783", "0.9876290559768677", "0.956303060054779", "0.9467501044273376", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9133892059326172", "0.9876290559768677", "0.9771881699562073", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9346728920936584" ]
[ "0", "1", "1", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/404/132015/404-132015-0050"
"put on a livery get up behind the carriage and come with me while i make some calls here prince hippolyte spluttered and burst out laughing long before his audience which produced an effect unfavorable to the narrator"
"video"
"other"
[ "put", "on", "a", "livery", "get", "up", "behind", "the", "carriage", "and", "come", "with", "me", "while", "I", "make", "some", "calls", "here", "principal", "eat", "spluttered", "and", "burst", "out", "laughing", "long", "before", "his", "audience", "which", "produced", "an", "effect", "unfavorable", "to", "the", "narrator" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7487568259239197", "0.9128385782241821", "0.7955409288406372", "0.904179573059082", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.763863742351532", "0.9128385782241821", "0.9128385782241821", "0.9041799306869507", "0.8514775037765503", "0.851477324962616", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/8250/286289/8250-286289-0028"
"but the colours are gayer the fourth member of the party is a lad of some fifteen years old his figure which is naked to the waist is of a pure grecian model the muscles showing up clearly beneath the skin"
"default"
"other"
[ "but", "the", "colors", "are", "gay", "the", "fourth", "member", "of", "the", "party", "is", "allowed", "if", "some", "15", "years", "old", "is", "figure", "which", "is", "naked", "to", "the", "waist", "is", "a", "vacuole", "accretion", "model", "the", "muscles", "showing", "up", "clearly", "beneath", "the", "skin" ]
[ "0.906909167766571", "0.9876290559768677", "0.9876290559768677", "0.9754547476768494", "0.9859352111816406", "0.972415566444397", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.6082165837287903", "0.608216404914856", "0.4331943690776825", "0.44110599160194397", "1.0", "0.6397807002067566", "0.7845863699913025", "1.0", "0.7884157299995422", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.5576140284538269", "0.5576140284538269", "0.9876290559768677", "0.9718042612075806", "0.9356299042701721", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677" ]
[ "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "1", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/3118/5909/3118-5909-0020"
"of respect and contempt"
"default"
"clean"
[ "of", "respect", "and", "contempt" ]
[ "0.9850539565086365", "1.0", "0.9876290559768677", "0.986049473285675" ]
[ "0", "0", "0", "0" ]
"train-other-500/4174/69293/4174-69293-0000"
"chapter nine local affairs"
"default"
"other"
[ "tap", "29", "local", "affairs" ]
[ "0.29783788323402405", "0.30881571769714355", "0.8028468489646912", "0.8027202486991882" ]
[ "1", "1", "0", "0" ]
"train-other-500/6211/60544/6211-60544-0038"
"the botanist beside me dreams i know of vindications for that woman all this world before us and its order and liberty are no more than a painted scene before which he is to meet her at last freed from that scoundrel"
"video"
"other"
[ "the", "button", "is", "beside", "me", "dreams", "I", "know", "of", "indications", "for", "that", "woman", "all", "this", "world", "before", "us", "and", "it's", "order", "and", "Liberty", "are", "no", "more", "than", "a", "painted", "seen", "before", "which", "he", "is", "to", "meet", "her", "at", "last", "freed", "from", "that", "scoundrel" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7248892784118652", "0.9128385782241821", "0.7325892448425293", "0.7240051627159119", "0.7201652526855469", "0.9128385782241821", "0.8727502226829529", "0.7714969515800476", "0.9128385782241821", "0.7532129883766174", "0.9128385782241821", "0.7224091291427612", "0.7558400630950928", "0.9128385782241821", "0.7115858197212219", "0.7395402789115906", "0.7707650661468506", "0.5438359975814819", "0.7219434976577759", "0.9128385782241821", "0.9128385782241821", "0.7287070155143738", "0.9128385782241821", "0.8820071816444397", "0.9128385782241821", "0.8325297236442566", "0.788113534450531", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7257137298583984", "0.7257137298583984", "0.9128385782241821", "0.5333195328712463", "0.8820071816444397", "0.6076077222824097", "0.5284157395362854", "0.7565654516220093" ]
[ "0", "1", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/2504/154289/2504-154289-0061"
"which was pronounced superior to all the porter in the world even to the famous highland usquebaugh or whisky"
"video"
"other"
[ "which", "was", "pronounced", "Superior", "to", "all", "the", "porter", "in", "the", "world", "even", "to", "the", "famous", "Highland", "Escobar", "or", "whiskey" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7227398157119751", "0.823596179485321", "0.8638884425163269", "0.8662649393081665", "0.9128385782241821", "0.9128385782241821", "0.770852267742157", "0.9128385782241821", "0.6598852872848511", "0.7558597922325134", "0.5807011723518372", "0.7462642192840576" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "1" ]
"train-other-500/7346/91900/7346-91900-0000"
"has retained the throne of tartarus they have both been more lucky than their brother jupiter who had to suffer specially the vicissitudes of fortune this third son of saturn who after the fall of his sire assumed the sovereignty of the heavens"
"video"
"other"
[ "has", "retained", "the", "Throne", "of", "Tartarus", "they", "have", "both", "been", "more", "lucky", "than", "their", "brother", "Jupiter", "who", "had", "to", "suffer", "specially", "the", "vicissitudes", "of", "Fortune", "this", "third", "son", "of", "Saturn", "who", "after", "the", "fall", "of", "his", "sire", "assumed", "the", "sovereignty", "of", "the", "heavens" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.69553542137146", "0.8709372282028198", "0.9128385782241821", "0.9128385782241821", "0.7334033250808716", "0.6935431361198425", "0.9128385782241821", "0.8772082328796387", "0.6285603046417236", "0.9128385782241821", "0.9128385782241821", "0.7338302135467529", "0.7318071722984314", "0.719011127948761", "0.9128385782241821", "0.6034621596336365", "0.7236188650131226", "0.7236188650131226", "0.5182586908340454" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/5712/48842/5712-48842-0018"
"you must be very careful in wrapping yourself as you go downstairs said bell who stood by the tray on which she had brought up the toast and tea the cold is what you would call awful i should call it jolly said lily if i could get up and go out"
"default"
"clean"
[ "you", "must", "be", "very", "careful", "and", "wrapping", "yourself", "as", "you", "go", "downstairs", "said", "Bell", "who", "stood", "by", "the", "tray", "on", "which", "he", "had", "brought", "up", "the", "toast", "and", "tea", "the", "cold", "is", "what", "you", "would", "call", "awful", "I", "should", "call", "it", "Jolly", "said", "Lily", "if", "I", "could", "get", "up", "and", "go", "out" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9283115863800049", "0.9563257098197937", "0.9876290559768677", "0.9876290559768677", "0.5625318884849548", "0.49462220072746277", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.7897418737411499", "0.8909056186676025", "0.9274511933326721", "0.8661656975746155", "0.9876290559768677", "0.9876290559768677", "0.9508641958236694", "0.8059686422348022", "0.9876290559768677", "0.9442673921585083", "0.9876290559768677", "0.9876290559768677", "0.9145570993423462", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9393931031227112", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9340532422065735", "0.9241837859153748", "0.8788103461265564", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/727/124443/727-124443-0112"
"and especial when his mind is disturbed"
"video"
"other"
[ "and", "especially", "when", "his", "mind", "is", "Disturbed" ]
[ "0.7501130104064941", "0.4317464530467987", "0.861461341381073", "0.6824289560317993", "0.9128385782241821", "0.525173008441925", "0.6714275479316711" ]
[ "0", "1", "0", "0", "0", "0", "0" ]
"train-other-500/3261/154309/3261-154309-0065"
"france said toline quietly is an english province chief city"
"video"
"other"
[ "France", "it", "totaling", "quietly", "is", "an", "English", "Province", "Chief", "City" ]
[ "0.9128385782241821", "0.9128385782241821", "0.3726557493209839", "0.6488503813743591", "0.9128385782241821", "0.7246946692466736", "0.6213340759277344", "0.7658728361129761", "0.9128385782241821", "0.7260680198669434" ]
[ "0", "1", "1", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/2790/142824/2790-142824-0078"
"beat all together until well mixed then pour it into the dish over the jam and bake for an hour in a moderate oven"
"default"
"clean"
[ "it", "all", "together", "until", "well", "mixed", "and", "import", "into", "the", "dish", "over", "the", "Chatham", "and", "bake", "for", "an", "hour", "in", "the", "motor", "to", "oven" ]
[ "0.790342390537262", "0.9243550300598145", "0.9244647026062012", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.8907057642936707", "0.5526237487792969", "0.9859459400177002", "0.7980731725692749", "1.0", "0.9762216210365295", "0.9876290559768677", "0.8778420090675354", "0.9671061635017395", "0.9813978672027588", "0.9876290559768677", "0.9598279595375061", "0.9876290559768677", "0.5539683103561401", "0.5539683103561401", "0.5539683103561401", "0.5539683103561401", "0.4266432523727417" ]
[ "1", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0" ]
"train-clean-360/166/126302/166-126302-0016"
"no madam is it tom no madam is it jemmy it is not can your name be"
"video"
"clean"
[ "no", "Madam", "is", "it", "Tom", "no", "Madam", "is", "it", "jemmy", "it", "is", "not", "can", "your", "name", "be" ]
[ "0.9128385782241821", "0.8989161849021912", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.8838005661964417", "0.9128385782241821", "0.9085026979446411", "0.8894521594047546", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/608/123194/608-123194-0002"
"by one of those impish happenings which are like a dream turned upside down been writing any more stories lately inquired mister harrison genially one evening when anne was taking tea with him and missus harrison"
"video"
"other"
[ "buy", "one", "of", "those", "impish", "happenings", "which", "are", "like", "a", "dream", "turned", "upside", "down", "been", "writing", "any", "more", "stories", "lately", "inquired", "mr.", "Harrison", "Gene", "only", "one", "evening", "when", "an", "was", "taking", "tea", "with", "him", "and", "mrs.", "Harrison" ]
[ "0.6996995210647583", "0.9128385782241821", "0.7401785850524902", "0.7652766108512878", "0.7915397882461548", "0.8893468976020813", "0.9128385782241821", "0.9128385782241821", "0.8699794411659241", "0.7272750735282898", "0.7487115263938904", "0.9128385782241821", "0.9092919230461121", "0.7013121247291565", "0.9128385782241821", "0.7733074426651001", "0.7733752131462097", "0.7729681134223938", "0.8983461856842041", "0.9009308218955994", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7266793251037598", "0.7233266234397888", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7271416187286377", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.761030375957489", "0.9128385782241821", "0.7705702781677246", "0.7571389675140381", "0.7431833744049072" ]
[ "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "1", "1", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "0" ]
"train-other-500/3400/153975/3400-153975-0007"
"could easily instruct her workmen how to provide her eagerness to see so strange a sight as the ascent of a human being into the sky overcame any scruples of conscience that she might have otherwise felt"
"default"
"other"
[ "could", "easily", "instructor", "work", "Manhattan", "provide", "her", "eagerness", "to", "see", "so", "strange", "human", "being", "into", "the", "sky", "overcame", "any", "Scruples", "of", "conscience", "that", "she", "might", "have", "otherwise" ]
[ "0.9876290559768677", "0.9586314558982849", "0.9383566379547119", "0.923930287361145", "0.9239303469657898", "0.9504592418670654", "0.9907796382904053", "0.9385022521018982", "0.9279342889785767", "0.9410897493362427", "0.9442442059516907", "0.9455666542053223", "0.8974590301513672", "0.933122992515564", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.2575043737888336", "0.8123273849487305", "0.8627461791038513", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677" ]
[ "0", "0", "1", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/85/121551/85-121551-0078"
"that until death they may both watch and sleep beside that spouse who every vow accepts which charity conformeth to his pleasure"
"video"
"other"
[ "but", "until", "death", "they", "may", "both", "watch", "and", "sleep", "beside", "that", "spouse", "who", "every", "vow", "accepts", "which", "charity", "can", "for", "math", "to", "his", "pleasure" ]
[ "0.7148945927619934", "0.9016371369361877", "0.8118104934692383", "0.9128385782241821", "0.8208494186401367", "0.7583872675895691", "0.7763880491256714", "0.9128385782241821", "0.7679312825202942", "0.7583872675895691", "0.7583873867988586", "0.8924956321716309", "0.9128385782241821", "0.9128385782241821", "0.8376044034957886", "0.9122905135154724", "0.885958731174469", "0.9128385782241821", "0.4841618835926056", "0.49341264367103577", "0.48548293113708496", "0.6553730368614197", "0.7621728181838989", "0.6668992638587952" ]
[ "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0", "0", "0" ]
"train-other-500/720/173578/720-173578-0001"
"i therefore do denounce all amorous writing except in such a way as not to attract plain simple short and by no means inviting but with a moral to each error tack'd"
"default"
"other"
[ "hi", "there", "for", "dude", "announce", "whole", "Amorous", "writing", "except", "in", "such", "a", "way", "as", "not", "to", "attract", "plain", "simple", "short", "and", "by", "no", "means", "invite", "but", "with", "a", "moral", "to", "each", "are", "attacked" ]
[ "1.0", "0.9876290559768677", "0.9729024767875671", "0.9876290559768677", "0.481128066778183", "0.7943255305290222", "0.6959169507026672", "0.8898845911026001", "0.5061526298522949", "0.5061526298522949", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9384483098983765", "0.9235143661499023", "0.9542402625083923", "0.9876290559768677", "0.8642885684967041", "0.9876290559768677", "0.9720025062561035", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9466471076011658", "0.9896969795227051", "0.9876290559768677", "0.9876290559768677", "0.9358317852020264", "0.9586864709854126", "0.9403911828994751", "0.8918045163154602", "0.8528160452842712" ]
[ "1", "1", "1", "1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "1" ]
"train-clean-360/4010/10822/4010-10822-0025"
"i came into the world to show him i am a king because he sent me to bear witness to his truth and i bear it kill me and i will rise again you can kill me but you cannot hold me dead death is my servant"
"video"
"clean"
[ "I", "came", "into", "the", "world", "to", "show", "him", "I", "am", "a", "king", "because", "he", "sent", "to", "me", "to", "Bear", "witness", "to", "his", "truth", "and", "I", "bear", "it", "kill", "me", "and", "I", "will", "rise", "again", "you", "can", "kill", "me", "but", "you", "cannot", "hold", "me", "dead", "death", "is", "my", "servant" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/337/123025/337-123025-0010"
"so please help me to mount like a good friend as you are there was no more to be said he placed her carefully on the saddle looked to the bit and curb gave the reins gently into her hand and then mounted me"
"video"
"clean"
[ "so", "please", "help", "me", "to", "Mount", "like", "a", "good", "friend", "as", "you", "are", "there", "was", "no", "more", "to", "be", "said", "he", "placed", "her", "carefully", "on", "the", "saddle", "looked", "to", "the", "bit", "and", "curb", "gave", "the", "reins", "gently", "into", "her", "hand", "and", "then", "mounted", "me" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/1607/150715/1607-150715-0032"
"departed long since to enrich the barbarians with our treasures and now returns with her savage allies to contaminate"
"default"
"clean"
[ "departed", "long", "since", "to", "enrich", "The", "Barbarians", "with", "our", "treasures", "and", "now", "returns", "with", "her", "Savage", "allies", "to", "contaminate" ]
[ "0.958601713180542", "0.9486281275749207", "0.5991039276123047", "0.9602839946746826", "0.9642739295959473", "0.982586681842804", "0.963403582572937", "0.9716522693634033", "0.9876290559768677", "0.9876290559768677", "0.9755743741989136", "0.84783935546875", "0.970113217830658", "0.9876290559768677", "0.9291343688964844", "0.944237470626831", "0.938239336013794", "0.9876290559768677", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/8367/279367/8367-279367-0026"
"the southern attack was spreading along the whole front and it was made with unexampled vigor it even excelled the fiery rush at stone river and the generals on both sides were largely the same that had fought the earlier great battle polk the bishop general"
"default"
"other"
[ "the", "southern", "attack", "was", "spreading", "along", "the", "whole", "front", "and", "it", "was", "made", "with", "unexampled", "bigger", "it", "even", "excelled", "the", "fiery", "Rush", "at", "Stone", "River", "and", "the", "generals", "on", "both", "sides", "were", "largely", "the", "same", "that", "it", "fought", "the", "earlier", "great", "battle", "Coke", "the", "bishop", "General" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9387556314468384", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9760048389434814", "0.6524345278739929", "0.7878979444503784", "0.9416936635971069", "0.8805038332939148", "0.9631825685501099", "0.9496567845344543", "0.8874161243438721", "0.9262253046035767", "0.9876290559768677", "0.9876290559768677", "0.9582719802856445", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9572483897209167", "0.9412009119987488", "0.9236776828765869", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.8365545868873596", "0.9513811469078064", "0.966917097568512", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "1", "0", "0", "0" ]
"train-other-500/1230/139225/1230-139225-0032"
"and mister john paley i can reply to that in a sentence nothing that is to their discredit they are two thoroughly estimable and trustworthy gentlemen so far as we are aware"
"video"
"other"
[ "I'm", "mr.", "John", "Paley", "I", "can", "reply", "to", "that", "in", "a", "sentence", "nothing", "that", "is", "to", "the", "discredit", "they", "are", "to", "subtly", "estimable", "and", "trustworthy", "gentleman", "so", "far", "as", "we", "are", "aware" ]
[ "0.8621739149093628", "0.8621739149093628", "0.9128385782241821", "0.8718357682228088", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.8998449444770813", "0.9128385782241821", "0.9100639820098877", "0.769696831703186", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7864570617675781", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.904963493347168", "0.9128385782241821", "0.9128385782241821", "0.90761399269104", "0.9128385782241821", "0.5563545227050781", "0.6346551775932312", "0.9128385782241821", "0.9128385782241821", "0.7237732410430908", "0.9128385782241821", "0.7088499069213867", "0.5252753496170044" ]
[ "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "1", "1", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0" ]
"train-other-500/8466/285142/8466-285142-0031"
"as if on the four winds i have a garden of my own but so with roses overgrown and lilies that you would it guess to be a little wilderness and all the spring time of the year it only loved to be there"
"video"
"other"
[ "thus", "if", "on", "the", "four", "wines", "I", "have", "a", "garden", "of", "my", "own", "but", "so", "with", "roses", "overgrown", "and", "Lilies", "that", "you", "would", "it", "guess", "to", "be", "a", "little", "Wilderness", "and", "all", "the", "springtime", "of", "the", "year", "it", "only", "loved", "to", "be", "there" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.605928897857666", "0.8033000230789185", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.6496663093566895", "0.8855574727058411", "0.9128385782241821", "0.872459352016449", "0.9128385782241821", "0.7290054559707642", "0.9040791392326355", "0.8139275908470154", "0.9128385782241821", "0.7716816663742065", "0.7620817422866821", "0.9128385782241821", "0.8139275908470154", "0.9128385782241821", "0.7263129949569702", "0.7233636379241943", "0.9128385782241821", "0.6504200100898743", "0.842968761920929", "0.7270337343215942", "0.842968761920929", "0.9128385782241821", "0.8139275908470154", "0.7479671835899353" ]
[ "1", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/7737/114039/7737-114039-0004"
"in this letter which by the bye is very friendly in tone"
"default"
"other"
[ "in", "this", "letter", "which", "by-the-bye", "is", "very", "friendly", "in", "tone" ]
[ "0.8395357728004456", "0.9239141941070557", "0.9362821578979492", "0.8961091637611389", "0.8961091637611389", "0.9015855193138123", "0.9481039047241211", "0.9876290559768677", "0.7955423593521118", "0.8740373849868774" ]
[ "0", "0", "0", "0", "1", "0", "0", "0", "0", "0" ]
"train-other-500/2405/148581/2405-148581-0015"
"gave ralph the impression of having been done over by the same hand he was smoother broader more supremely tailored and his whole person exhaled the faintest whiff of an expensive scent"
"video"
"other"
[ "gave", "Ralphie", "impression", "of", "having", "been", "done", "over", "by", "the", "same", "hand", "he", "was", "smoother", "broader", "more", "supremely", "tailored", "and", "his", "whole", "person", "exhaled", "the", "faintest", "whiff", "of", "an", "expensive", "scent" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.6731106042861938" ]
[ "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/253/124400/253-124400-0015"
"although i have been accustomed to think lowly enough of myself and although when i look with the eye of a philosopher at the varied courses and pursuits of mankind at large i find scarcely one which does not appear in vain and useless"
"video"
"other"
[ "although", "I", "have", "been", "accustomed", "to", "think", "lowly", "enough", "of", "myself", "and", "although", "when", "I", "look", "with", "the", "eye", "of", "a", "philosopher", "at", "the", "very", "causes", "and", "Pursuits", "of", "mankind", "at", "large", "I", "find", "scarcely", "one", "which", "does", "not", "appear", "in", "vain", "and", "useless" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.5149943232536316", "0.7432230114936829", "0.7338324189186096", "0.7519640326499939", "0.7284926176071167", "0.7071487903594971", "0.9128385782241821", "0.9128385782241821", "0.7491461038589478", "0.7284926176071167", "0.8648757338523865", "0.7593795657157898", "0.7404221296310425", "0.8987591862678528", "0.5352020859718323", "0.9128385782241821", "0.6999009847640991", "0.5257279872894287", "0.7259483337402344", "0.9128385782241821", "0.7653255462646484", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7259483337402344", "0.7496729493141174", "0.6999009847640991", "0.9128385782241821", "0.6937806010246277", "0.6453348994255066" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/4172/185480/4172-185480-0055"
"as they say of john brown mouldering in his grave judge of the astonishment and delight of all paris at his reappearance in his native city in precisely the same costume and carriage as formerly"
"default"
"other"
[ "as", "they", "save", "John", "Brown", "moldering", "in", "his", "grave", "judge", "of", "the", "establishment", "into", "light", "of", "Old", "Paris", "and", "his", "reappearance", "in", "his", "native", "City", "in", "precisely", "the", "same", "costume", "in", "Carriage", "as", "formerly" ]
[ "1.0", "0.9876290559768677", "0.7885730862617493", "0.911773681640625", "0.9095360040664673", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9757053852081299", "0.9868197441101074", "0.9876290559768677", "0.9332324266433716", "0.6536325812339783", "0.8464751839637756", "0.7250444889068604", "0.6445425152778625", "0.9535929560661316", "0.8149804472923279", "0.9239555597305298", "0.8070715069770813", "0.9270466566085815", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9762728810310364", "0.27563363313674927", "0.7880539298057556", "0.938171923160553", "0.9029938578605652" ]
[ "0", "0", "1", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0", "1", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0" ]
"train-other-500/5287/39165/5287-39165-0031"
"and necessary to mention and yet it is a very delicate affair to speak of fanny opened her eyes and said that she hoped that nothing was wrong no my dear i think nothing is wrong"
"default"
"other"
[ "is", "necessary", "to", "mention", "and", "yes", "it", "is", "a", "very", "delicate", "to", "speak", "of", "funny", "open", "her", "eyes", "on", "set.", "She", "hopes", "that", "nothing", "was", "wrong", "no", "my", "dad", "I", "think", "nothing", "is", "wrong" ]
[ "0.8162180185317993", "0.9422138929367065", "0.8154072761535645", "0.9876290559768677", "0.8143574595451355", "0.7643505930900574", "0.9617010354995728", "0.9508451819419861", "0.7815748453140259", "0.8783549666404724", "0.9876290559768677", "0.8567609190940857", "0.9097638130187988", "0.7895963191986084", "0.8233099579811096", "0.3364129960536957", "0.5113523006439209", "0.9728817939758301", "0.566725492477417", "0.5640897750854492", "0.8927482962608337", "0.9728115200996399", "0.9822272062301636", "0.9695971012115479", "0.9319639205932617", "0.9701188206672668", "0.8117112517356873", "0.9594414830207825", "0.9594414830207825", "0.7834110856056213", "0.8585255146026611", "0.9681100845336914", "0.9335779547691345", "0.9525070190429688" ]
[ "1", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "1", "1", "0", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0" ]
"train-clean-360/157/126796/157-126796-0038"
"a man hung up like a dog then you might save me from this ignominious death said birch springing to his feet and catching the dragoon by the arm and oh what will i not give you in reward"
"default"
"clean"
[ "a", "man", "hung", "up", "like", "a", "dog", "then", "you", "might", "save", "me", "from", "this", "ignoramus", "death", "submerge", "spring", "to", "his", "feet", "and", "catching", "the", "Dragoon", "by", "the", "arm", "and", "know", "what", "will", "I", "not", "give", "you", "and", "reward" ]
[ "0.8921866416931152", "0.42751070857048035", "0.9263544082641602", "0.9409253001213074", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.8751033544540405", "0.869722306728363", "0.869722306728363", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9599246978759766", "0.9876290559768677", "0.9876290559768677", "0.9183244109153748", "0.9547891020774841", "0.22681103646755219", "0.9725438356399536", "0.9312846064567566", "0.9876290559768677", "0.9876290559768677", "0.9450986385345459", "0.9876290559768677", "0.7883146405220032", "0.7894120216369629" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "0" ]
"train-other-500/366/127793/366-127793-0027"
"and i shall be as much king of my realm as any other of his and being so i should do as i liked and doing as i liked i should please myself and pleasing myself i should be content"
"default"
"other"
[ "and", "I", "shall", "be", "as", "much", "king", "of", "my", "realm", "as", "any", "other", "of", "his", "and", "being", "so", "I", "should", "do", "as", "I", "liked", "and", "doing", "as", "I", "like", "I", "should", "please", "myself", "and", "pleasing", "myself", "I", "should", "be", "content" ]
[ "0.8678075075149536", "0.8678075075149536", "0.8264053463935852", "0.927977979183197", "0.9240063428878784", "0.9876290559768677", "0.8590112924575806", "0.9245095252990723", "0.9185998439788818", "0.7874597907066345", "0.8591075539588928", "0.9876290559768677", "0.9534282684326172", "0.9876290559768677", "0.9876290559768677", "0.9386263489723206", "0.9611208438873291", "0.9728595018386841", "0.9775500893592834", "0.9876290559768677", "0.9876290559768677", "0.9567013382911682", "0.9625682830810547", "0.9876290559768677", "0.9613403081893921", "0.9876290559768677", "0.5389944911003113", "0.8979684710502625", "0.9347783327102661", "0.9876290559768677", "0.9876290559768677", "0.9578043818473816", "0.9876290559768677", "0.9671761989593506", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/7001/12337/7001-12337-0145"
"i made up my mind at once mister ashurst i said looking up from my keyboard i can give you this girl's name and then you can insert the proviso immediately you can"
"default"
"other"
[ "I", "made", "up", "my", "mind", "at", "once", "is", "a", "stressed", "I", "said", "looking", "up", "for", "my", "keyboard", "I", "can", "give", "you", "this", "girl's", "name", "and", "then", "you", "can", "insert", "the", "Proviso", "immediately", "you", "can" ]
[ "0.9876290559768677", "0.9876290559768677", "0.952796459197998", "0.9876290559768677", "0.9129257798194885", "0.8773931264877319", "0.9176539778709412", "0.5233942866325378", "0.6279353499412537", "0.5961694121360779", "0.9552486538887024", "0.9876290559768677", "0.9462592601776123", "0.9393808841705322", "0.9301726818084717", "0.9876290559768677", "0.9739624261856079", "0.88551926612854", "0.931056797504425", "0.9876290559768677", "0.9838557243347168", "0.9876290559768677", "0.9757136702537537", "0.9876290559768677", "1.0", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9711399078369141", "0.9268923401832581", "0.9280176162719727" ]
[ "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/8033/104515/8033-104515-0041"
"then he smashed down his hat upon his head and left the room"
"default"
"other"
[ "then", "he", "smashed", "down", "his", "head", "upon", "his", "head", "and", "left", "the", "room" ]
[ "0.9499433636665344", "0.9405184984207153", "0.9729862213134766", "0.9242950677871704", "0.8500192165374756", "0.8944582939147949", "0.8973494172096252", "0.8869794011116028", "0.9198837280273438", "0.980972409248352", "0.9876290559768677", "0.5224797129631042", "0.9680446982383728" ]
[ "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/8771/294523/8771-294523-0046"
"for a short time he took lessons in oil painting from a german he soon found that he had not the eye nor the hand for the work but it happened that the teacher's father had been a soldier in the army of frederick the great and as soon as walter found this out"
"default"
"clean"
[ "for", "short", "time", "he", "took", "lessons", "in", "oil", "painting", "from", "a", "German", "he", "soon", "found", "that", "he", "had", "not", "the", "I", "nor", "the", "hand", "for", "the", "work", "but", "it", "happens", "that", "the", "teachers", "father", "had", "been", "a", "soldier", "in", "the", "army", "of", "Frederick", "the", "great", "and", "as", "soon", "as", "Walter", "found", "this", "out" ]
[ "0.9061295390129089", "0.7854540348052979", "0.8052241802215576", "0.8734906911849976", "0.9876290559768677", "0.9876290559768677", "0.6505581140518188", "0.8834662437438965", "0.9876290559768677", "0.9876290559768677", "0.9558830857276917", "0.9413155913352966", "0.966598391532898", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9798754453659058", "1.0", "0.8468396663665771", "0.9744873046875", "0.9876290559768677", "0.9671272039413452", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9509117007255554", "0.9876290559768677", "0.9680991768836975", "0.9786584377288818", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.8648936152458191", "0.9759083986282349", "0.9876290559768677", "0.7681041955947876" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/2299/6525/2299-6525-0061"
"the pianola has been erected by rennick he is a good fellow and one feels for him much at such a time"
"default"
"clean"
[ "the", "pianola", "has", "been", "erected", "by", "renick", "he", "is", "a", "good", "fellow", "and", "one", "feels", "for", "him", "much", "at", "such", "a", "Time" ]
[ "0.9868413209915161", "0.857490599155426", "0.9341284036636353", "0.9242430329322815", "0.9876290559768677", "0.975887656211853", "0.596365213394165", "0.7868078947067261", "0.7867836952209473", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.8891527056694031", "0.9799990057945251", "0.9876290559768677", "0.9876290559768677", "0.9811293482780457", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/2026/22756/2026-22756-0016"
"the pony came trotting round the corner of the street looking as obstinate as pony might and picking his steps as if he were spying about for the cleanest places and would by no means dirty his feet or hurry himself inconveniently"
"video"
"other"
[ "the", "pony", "came", "trotting", "around", "the", "corner", "of", "the", "street", "looking", "as", "obstinate", "as", "Pony", "might", "and", "picking", "his", "steps", "as", "if", "he", "were", "spying", "about", "for", "the", "cleanest", "places", "and", "would", "by", "no", "means", "dirty", "his", "feet", "or", "hurry", "himself", "in", "conveniently" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.5734513998031616", "0.7151581645011902", "0.828755795955658", "0.9128385782241821", "0.7666340470314026", "0.7539482712745667", "0.9128385782241821", "0.7264890670776367", "0.7264890670776367", "0.7485272884368896", "0.7710164189338684", "0.7666341662406921", "0.9128385782241821", "0.6716857552528381", "0.9040371775627136", "0.7234039306640625", "0.9128385782241821", "0.706632673740387", "0.7666340470314026", "0.9128385782241821", "0.6617952585220337", "0.9128385782241821", "0.828755795955658", "0.7162132859230042", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.5194618105888367", "0.5289554595947266" ]
[ "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1" ]
"train-other-500/1621/141599/1621-141599-0043"
"to put the whole case in the fewest possible words"
"video"
"other"
[ "put", "the", "whole", "case", "in", "the", "fewest", "possible", "words" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/4189/115685/4189-115685-0002"
"from then on i want every man to disappear from sight and to travel under cover and keep your ultrophones open and tuned on ten four seven six wilma and i had received our battle equipment from the gear boss"
"video"
"other"
[ "from", "then", "on", "I", "want", "every", "man", "to", "disappear", "from", "sight", "and", "to", "travel", "under", "cover", "and", "keep", "your", "Ultra", "phones", "open", "and", "tuned", "on", "10", "476", "Wilmer", "and", "I", "had", "received", "our", "battle", "equipment", "from", "the", "gear", "boss" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.7279630899429321", "0.9128385782241821", "0.8317571878433228", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.5986914038658142", "0.5922175049781799", "0.5307053327560425", "0.6903831958770752", "0.7051836252212524", "0.9128385782241821", "0.5701964497566223", "0.9128385782241821", "0.9128385782241821", "0.6939429640769958", "0.9128385782241821", "0.9128385782241821", "0.7261698246002197", "0.7067399621009827" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/288/130994/288-130994-0033"
"may serve as a standard the state of agriculture and the populousness of a country have been considered as nearly connected with each other and as a rule for the purpose intended numbers in the view of simplicity and certainty are entitled to a preference"
"default"
"clean"
[ "my", "service", "is", "standard", "the", "state", "of", "Agriculture", "and", "a", "population", "of", "a", "country", "has", "been", "considered", "as", "nearly", "connected", "with", "each", "other", "and", "as", "a", "rule", "for", "the", "purpose", "intended", "numbers", "in", "the", "view", "of", "Simplicity", "and", "certainty", "are", "entitled", "to", "a", "preference" ]
[ "0.805535078048706", "0.7966353297233582", "0.7966369986534119", "0.9263131618499756", "0.8102902173995972", "0.8102899789810181", "0.9646723866462708", "0.9831097722053528", "0.7995683550834656", "0.8509378433227539", "0.8209333419799805", "0.5935999751091003", "0.9861285090446472", "0.9618100523948669", "0.22857779264450073", "0.6733403205871582", "0.7888525128364563", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "1.0", "0.8507181406021118", "0.8938379287719727", "0.9490782022476196", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677" ]
[ "1", "1", "1", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/8753/297946/8753-297946-0032"
"with his snaky neck and he came a little more out of the hole and said of course i am the fairy prince everybody knows that i've been a fairy prince for ever and ever so long"
"default"
"other"
[ "with", "his", "sneaky", "neck", "and", "it", "came", "a", "little", "more", "out", "of", "the", "hole", "instead", "of", "false", "I", "am", "a", "fatty", "Prince", "everybody", "knows", "that", "I", "have", "benefited", "friends", "forever", "and", "ever", "so", "long" ]
[ "0.9876290559768677", "0.9627341628074646", "0.9627716541290283", "0.9420050382614136", "0.9876290559768677", "0.9625586867332458", "0.9888373613357544", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.8928897976875305", "0.5973252058029175", "0.8145609498023987", "0.3954780101776123", "0.807912290096283", "0.7987039685249329", "0.9876290559768677", "0.30130454897880554", "0.7813953161239624", "1.0", "0.9741504788398743", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.8477879762649536", "0.652137815952301", "0.9258655309677124", "0.9618423581123352", "0.9037244319915771", "0.8435763120651245", "0.986057460308075" ]
[ "0", "0", "1", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "1", "1", "1", "1", "0", "0", "0", "0" ]
"train-other-500/1260/139271/1260-139271-0024"
"to set a form upon desired change as i'll myself disgrace knowing thy will i will acquaintance strangle and look strange be absent from thy walks and in my tongue thy sweet beloved name no more shall dwell"
"default"
"other"
[ "to", "set", "a", "full", "map", "on", "desire", "to", "change", "cuz", "I", "owe", "myself", "disgrace", "knowing", "if", "I", "will", "I", "will", "acquaintance", "strangle", "and", "look", "strange", "be", "absent", "from", "my", "works", "I", "did", "my", "tongue", "by", "sweet", "beloved", "name", "no", "more", "shall", "dwell" ]
[ "0.9401835799217224", "0.9583292007446289", "0.8895373940467834", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9004888534545898", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9125316143035889", "0.9767526388168335", "0.9189010858535767", "0.9876290559768677", "0.9876290559768677", "0.837326169013977", "0.9243645071983337", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9012325406074524", "0.9012308120727539", "0.9876290559768677", "0.852628767490387", "0.6061583161354065", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.850388765335083", "0.9590260982513428", "0.9876290559768677", "0.8643454313278198", "0.9876290559768677", "0.9545976519584656", "0.9732677936553955", "0.9876290559768677" ]
[ "0", "0", "0", "1", "1", "1", "1", "1", "0", "1", "0", "1", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "1", "0", "1", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/8240/116288/8240-116288-0062"
"what made you first mistrust i asked why remember how curiously o'brien acted when we hunted the robe how indifferent he was how he used dialect"
"default"
"other"
[ "what", "made", "you", "first", "mistrust", "I", "asked", "why", "remember", "how", "curiously", "O'Brien", "acted", "when", "we", "hunted", "the", "robe", "how", "indifferent", "he", "was", "how", "he", "uses", "dialect" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9464093446731567", "0.9115217328071594", "0.8199968934059143", "0.772811233997345", "0.8490878939628601", "0.8182610869407654", "0.9335237741470337", "0.9696387052536011", "0.9666366577148438", "0.9876290559768677", "0.9635433554649353", "0.9876290559768677", "0.9696594476699829", "0.9876290559768677", "0.9473503828048706", "1.0", "0.9096425771713257", "0.8607743382453918", "0.9874039888381958", "0.9876290559768677", "0.9876290559768677", "0.5900720357894897", "0.8991534113883972" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0" ]
"train-clean-360/8498/287359/8498-287359-0010"
"i'm interested in airships and i'll consider it a favor if you'll let me look yours over while it's here tom readily agreed and a few minutes later he had caught a trolley going into the city he was soon in one of the largest jewelry stores of chester"
"default"
"clean"
[ "I'm", "interested", "in", "airships", "and", "I'll", "consider", "it", "a", "favor", "if", "you", "let", "me", "look", "yours", "over", "while", "a", "tear", "Tom", "readily", "agreed", "and", "a", "few", "minutes", "later", "he", "had", "caught", "a", "trolley", "going", "into", "the", "city", "he", "was", "soon", "in", "one", "of", "the", "largest", "jewelry", "stores", "of", "Chester" ]
[ "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9391165375709534", "0.9876290559768677", "0.5142275094985962", "0.8153603672981262", "0.9876290559768677", "0.9876290559768677", "0.9050870537757874", "0.9876290559768677", "0.9509931802749634", "0.8228079080581665", "0.6437790989875793", "0.9617846012115479", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9263423681259155", "0.9876290559768677", "0.9876290559768677", "0.9660794734954834", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "1.0", "0.9704382419586182", "0.9876290559768677", "1.0", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9506740570068359", "0.924368143081665", "1.0", "0.939068615436554", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9496612548828125", "0.9744089841842651", "0.9876290559768677" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/8631/281208/8631-281208-0032"
"and send help to the miserable whenever he pleased i forgot not to lift up my heart in thankfulness to heaven and what heart could forbear to bless him who had not only in a miraculous manner provided for me in such a wilderness"
"video"
"other"
[ "and", "send", "help", "to", "the", "miserable", "whenever", "he", "pleased", "I", "forgot", "not", "to", "lift", "up", "my", "heart", "in", "thankfulness", "to", "heaven", "and", "what", "heart", "could", "forbear", "to", "bless", "him", "who", "had", "not", "only", "in", "a", "miraculous", "manner", "provided", "for", "me", "in", "such", "a", "wilderness" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.8852821588516235", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.8990066051483154", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9127584099769592", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.9044936299324036" ]
[ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-clean-360/6492/68369/6492-68369-0027"
"and is nearest cairo it covers thirteen acres of ground and is four hundred and fifty feet high my first sight of it was a disappointment for after all it is nothing but a pile of stone"
"default"
"clean"
[ "and", "his", "nearest", "Cairo", "it", "covers", "13", "acres", "of", "ground", "and", "is", "450", "feet", "high", "my", "first", "side", "of", "it", "was", "a", "disappointment", "for", "after", "all", "it", "is", "nothing", "but", "a", "pile", "of", "stone" ]
[ "0.9425044059753418", "0.7349774241447449", "0.9876290559768677", "0.9005128741264343", "0.9841432571411133", "0.9876290559768677", "0.36566853523254395", "0.9876290559768677", "0.5698396563529968", "0.9876290559768677", "1.0", "0.9562325477600098", "0.830069899559021", "0.5366397500038147", "0.9616522192955017", "0.9876223206520081", "0.9738461971282959", "0.8465766906738281", "0.9876290559768677", "0.9743384718894958", "0.9723386168479919", "0.9876290559768677", "0.9876290559768677", "0.9484284520149231", "0.9119681715965271", "0.9313098788261414", "0.9849764108657837", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9311321377754211" ]
[ "0", "1", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]
"train-other-500/1636/141789/1636-141789-0058"
"was too costly for the munificence or too trifling for the attention of his gracious and splendid host on the fifteenth of february james paid a farewell visit to versailles"
"video"
"other"
[ "was", "too", "costly", "for", "the", "magnificence", "or", "to", "traveling", "for", "the", "attention", "of", "his", "gracious", "and", "Splendid", "host", "on", "the", "15th", "of", "February", "James", "Bond", "A", "Farewell", "visit", "to", "Versailles" ]
[ "0.8019100427627563", "0.9128385782241821", "0.7448924779891968", "0.7660241723060608", "0.9128385782241821", "0.8979460597038269", "0.9128385782241821", "0.6346498727798462", "0.8256982564926147", "0.9128385782241821", "0.7226043343544006", "0.7100432515144348", "0.9128385782241821", "0.6774219274520874", "0.7226043343544006", "0.7273454666137695", "0.9128385782241821", "0.7092008590698242", "0.9128385782241821", "0.9042252898216248", "0.9128385782241821", "0.5747445821762085", "0.7277814745903015", "0.8893204927444458", "0.7268946766853333", "0.9128385782241821", "0.7632193565368652", "0.7105489373207092", "0.9128385782241821", "0.6724916696548462" ]
[ "0", "0", "0", "0", "0", "1", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "1", "0", "0", "0", "0", "0" ]
"train-other-500/7749/8591/7749-8591-0095"
"no he replied think you left it behind at that place last night yes did you rangon popped in with a twinkle i went through all my pockets again no cigarette case"
"video"
"other"
[ "no", "he", "replied", "thank", "you", "left", "it", "behind", "at", "the", "place", "last", "night", "yes", "did", "you", "rank", "and", "popped", "in", "with", "a", "twinkle", "I", "went", "through", "all", "my", "pockets", "again", "no", "cigarette", "case" ]
[ "0.9128385782241821", "0.9128385782241821", "0.9128385782241821", "0.5742611885070801", "0.6966071724891663", "0.9128385782241821", "0.7708262801170349", "0.6662878394126892", "0.748788595199585", "0.9128385782241821", "0.8143312335014343", "0.8873188495635986", "0.6246742010116577", "0.9128385782241821", "0.8515283465385437", "0.6473367214202881", "0.6689071655273438", "0.6696716547012329", "0.9128385782241821", "0.7231894731521606", "0.7495530843734741", "0.9128385782241821", "0.6609983444213867", "0.9128385782241821", "0.9128385782241821", "0.7308014631271362", "0.7231894731521606", "0.6492207050323486", "0.9128385782241821", "0.623765230178833", "0.6492207050323486", "0.9128385782241821", "0.7231894731521606" ]
[ "0", "0", "0", "1", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ]

RED-ACE

Dataset Summary

This dataset can be used to train and evaluate ASR Error Detection or Correction models. It was introduced in the RED-ACE paper (Gekhman et al, 2022).

The dataset contains ASR outputs on the LibriSpeech corpus (Panayotov et al., 2015) with annotated transcription errors.

Dataset Details

The LibriSpeech corpus was decoded using Google Cloud Speech-to-Text API, with the default and video models. The word-level confidence was enabled and is provided as part of the transcription hypothesis. To annotate word-level errors (for the error detection task), the hypothesis words were aligned with the reference (correct) transcription to find an edit path (insertions, deletions and substitutions) with the minimum edit distance (from the hypothesis to the reference). The hypothesis words with deletions and substitutions were then labeled as ERROR (1), the rest were labeled as NOTERROR (0).

Data format

The dataset has train, developement and test splits which correspond to the splits in Librispeech.

The data contains json lines with the following keys (note that asr_hypothesis[i], confidence_scores[i] and error_labels[i] correpond to the same word):

  • "id" - The librispeech id.
  • "truth" - The reference (correct) transcript from Librispeech.
  • "asr_model" - The ASR model used for transcription.
  • "librispeech_pool": Corresponds to the original pool (split) in the librispeech data.
  • "asr_hypothesis" - The transcription hypothesis.
  • "confidence_scores" - The word-level confidence scores provided as part of the transcription hypothesis.
  • "error_labels" - The error labels (1 error, 0 not error) that were obtained by alighning the hypothesis and the reference.

Here is an example of a single data item:

{
  "id": "test-other/6070/86744/6070-86744-0024",
  "truth": "my dear franz replied albert when upon receipt of my letter you found the necessity of asking the count's assistance you promptly went to him saying my friend albert de morcerf is in danger help me to deliver him",
  "asr_model": "default",
  "librispeech_pool": "other",
  "asr_hypothesis": ["my", "dear", "friends", "replied", "Albert", "received", "my", "letter", "you", "found", "the", "necessity", "of", "asking", "the", "county", "assistance", "you", "promptly", "went", "to", "him", "saying", "my", "friend", "all", "but", "the", "most", "stuff", "is", "in", "danger", "help", "me", "to", "deliver", "it"],
  "confidence_scores": ["0.9876290559768677", "0.9875272512435913", "0.6921446323394775", "0.9613730311393738", "0.9413103461265564", "0.6563355922698975", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "1.0", "1.0", "1.0", "1.0", "1.0", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.9876290559768677", "0.5291957855224609", "0.5291957855224609"],
  "error_labels": ["0", "0", "1", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "1", "1", "0", "0", "0", "0", "0", "0", "0", "1"]
}

Loading the dataset

The following code loads the dataset and locates the example data item from above:

from datasets import load_dataset

red_ace_data = load_dataset("google/red_ace_asr_error_detection_and_correction", split='test')

for example in red_ace_data:
  if example['id'] == 'test-other/6070/86744/6070-86744-0024':
    break
print(example)  

Citation

If you use this dataset for a research publication, please cite the RED-ACE paper (using the bibtex entry below), as well as the Librispeech paper mentioned above.

@inproceedings{gekhman-etal-2022-red,
    title = "{RED}-{ACE}: Robust Error Detection for {ASR} using Confidence Embeddings",
    author = "Gekhman, Zorik  and
      Zverinski, Dina  and
      Mallinson, Jonathan  and
      Beryozkin, Genady",
    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.emnlp-main.180",
    doi = "10.18653/v1/2022.emnlp-main.180",
    pages = "2800--2808",
    abstract = "ASR Error Detection (AED) models aim to post-process the output of Automatic Speech Recognition (ASR) systems, in order to detect transcription errors. Modern approaches usually use text-based input, comprised solely of the ASR transcription hypothesis, disregarding additional signals from the ASR model. Instead, we utilize the ASR system{'}s word-level confidence scores for improving AED performance. Specifically, we add an ASR Confidence Embedding (ACE) layer to the AED model{'}s encoder, allowing us to jointly encode the confidence scores and the transcribed text into a contextualized representation. Our experiments show the benefits of ASR confidence scores for AED, their complementary effect over the textual signal, as well as the effectiveness and robustness of ACE for combining these signals. To foster further research, we publish a novel AED dataset consisting of ASR outputs on the LibriSpeech corpus with annotated transcription errors.",
}
Downloads last month
3
Edit dataset card
Evaluate models HF Leaderboard