Datasets:
path
string
| audio
audio
| transcription
string
| translation
string
| lang_id
class label
1 classes
|
---|---|---|---|---|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134079.mp3" | ".هذه ملحوظة مهمة للغاية" | "That’s a very important note." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134080.mp3" | "من اخترع هذه الآلة ؟" | "Who invented this machine?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134081.mp3" | "ليس لديه بيت ليعيش فيه." | "He doesn't have a house to live in." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134082.mp3" | "ماذا ستفعل في عطلة نهاية الأسبوع ؟" | "What are you doing this weekend?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134096.mp3" | "نحن جوعانين." | "We're hungry." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134097.mp3" | ".ما زال ذراعي يؤلمني" | "My arm still hurts me." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134099.mp3" | "أتى ولدي إلى غرفتي." | "My son came to my room." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134100.mp3" | ".كيوتو ليست كبيرة كأوساكا" | "Kyoto isn't as big as Osaka." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134106.mp3" | ".افتُتحت المستشفى الشهر الماضي" | "The hospital opened in the last month." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134107.mp3" | "أتحب الرياضة ؟" | "Do you like sports?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134108.mp3" | "لا يهتم أحد برأيك." | "No one cares in your opinion." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134109.mp3" | "اِسمح لي أن أعرّفك بمايوكو." | "Let me introduce you to Miyoko." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134110.mp3" | "يحب أخي مشاهدة أفلام الرعب." | "My brother likes to watch horror movies." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134121.mp3" | "خيبت أملي فيك." | "I'm disappointed in you." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134123.mp3" | ".سيصل الاخرون في غضون دقائق معدودة" | "Other people will arrive in a few minutes." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134124.mp3" | "هل تحب ذلك ؟" | "Do you like that?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134125.mp3" | "إنها تعيش في راحة." | "She lives in comfort." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134237.mp3" | "عليّ أن أشتري واحدة." | "I have to buy one." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134241.mp3" | "لم لا تصغي إلي ؟" | "Why won't you listen to me?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134581.mp3" | ".أُصيبت كايت بالبرد" | "Kate had a cold." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134582.mp3" | ".هدفي أن أصبح طبيباً" | "My goal is to become a doctor." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134583.mp3" | "هل قابلتها ؟" | "Have you met her?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19134585.mp3" | "هل لديك أي مجلات يابانية ؟" | "Do you have any Japanese magazines?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171010.mp3" | "ما رأيك أن تأخذ استراحة ؟" | "How about that you take a break?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171011.mp3" | "إن لعب كرة المضرب ممتع." | "Playing tennis is fun." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171012.mp3" | "هل ستذهب إلى أمريكا السنة القادمة ؟" | "Are you going to America next year?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171013.mp3" | "هذه رسالة مهمة." | "This is an important message." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171014.mp3" | "بوب هو الذي ضربني و ليس هي." | "Bob is the one who beat me and not her." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171016.mp3" | "سأحبك إلى الأبد." | "I'll love you forever." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171017.mp3" | "أين الشاي مع الحليب ؟" | "Where's the tea with the milk?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171018.mp3" | "إن الطقس حار للغاية." | "The weather is very hot." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171019.mp3" | "عمّن تتكلم ؟" | "Who are you talking about?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171025.mp3" | "قررا الزواج الشهر المقبل." | "They decided to marry next month." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171026.mp3" | "الحياة جميلة." | "Life is beautiful." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171027.mp3" | "هل لاحظت كيف كان ينظر إليّ ؟" | "Did you notice how he was looking at me?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171028.mp3" | ".قاد توم السيارة" | "Tom drove the car." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171035.mp3" | "أعطيته كل ما كان عندي من المال." | "I gave him all I had of money." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171037.mp3" | "يمكنها أن تقود السيارة." | "She can drive the car." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171042.mp3" | "أحب أن آكل البطيخ." | "I like to eat the watermelon." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19171044.mp3" | "أنت تعرف الإجابة ، أليس كذلك ؟" | "You know the answer. Isn't it?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19185868.mp3" | ".تعال إلى منزلي" | "Come to my home." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19185869.mp3" | "هل انت بالغ؟" | "Are you an adult?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19185870.mp3" | ".الأمل ليس خطةً" | "Hope is not a plan." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19185872.mp3" | "من معي ؟" | "Who is this?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190472.mp3" | "ما جنسيته ؟" | "What's his nationality?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190473.mp3" | "أعرّفك بصديقي." | "let me introduce you to my friend." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190474.mp3" | "وافق المدرس على خطته." | "The teacher has approved his plan." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190475.mp3" | "رفضت دعوته." | "She declined his invitation." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190476.mp3" | "إخلع خفيك من فضلك." | "Pease, take off your slippers." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190483.mp3" | "إنه يتكلم الصينية بطلاقة." | "He speaks Chinese fluently." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190484.mp3" | ".سمعنا ذلك من فلان و فلان" | "We heard that from person to others." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190485.mp3" | ".سقطت الشجرة" | "The tree fell." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190487.mp3" | ".هذا أجمل" | "That's beautiful." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190490.mp3" | ".يمكنها ان تَعُدَّ من الواحد إلى العشرة" | "She can count from one to ten." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190491.mp3" | "كان بيل في اليابان." | "Bill was in Japan." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190492.mp3" | ".أبي مدرّس" | "My father is a teacher." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190493.mp3" | "استرح." | "Relax." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190494.mp3" | "ترك الباب مفتوحاً." | "The door is left open." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190495.mp3" | "العشب بحاجة إلى التجديب." | "The grass needs cutting." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190496.mp3" | "لن أنساك أبداً." | "I'll never forget you." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190497.mp3" | ".لا تقلق ، أنا هنا الآن" | "Don't worry, I'm here now." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190498.mp3" | "هذا أصغر من ذاك." | "This is smaller than that." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190499.mp3" | "عليك البدء في الحال." | "You have to start immediately." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190501.mp3" | "أنا سعيد لرؤيتك مجدداً." | "I'm glad to see you again." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190513.mp3" | "هل الحقيبة السوداء ملكك ؟" | "Is this black bag yours?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190514.mp3" | ".ليس مهماً" | "It's not important." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190516.mp3" | ".عليك أن تعمل بجهد أكبر" | "You have to work harder." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190517.mp3" | ".عددهم كبير" | "There's a lot of them." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190518.mp3" | ".أنا ولد" | "I am a boy" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190520.mp3" | ".هذا الشبل من ذاك الأسد" | "This cub from that lion." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190521.mp3" | ".لم يشعل النار مع أن الجو كان بارداً" | "He didn't light the fire, though the weather was cold." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190522.mp3" | "دمر الحريق المدينة." | "The fire destroyed the city." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190523.mp3" | "ما اسم تلك السمكة بالإنجليزية ؟" | "What’s the name of this fish in English?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190524.mp3" | ".صرخ طالباً النجدة" | "He shouted asking for help." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190525.mp3" | "أبريل هو موسم الضرائب في الولايات المتحدة" | "April is the tax season in the United States." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190526.mp3" | "إنه لطيف جداً." | "He's very nice." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190532.mp3" | "أين المصرف ؟" | "Where is the bank?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190533.mp3" | "و كيف لي أن أعرف ؟" | "How can I know?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190534.mp3" | ".ما من طريقة أخرى" | "There's no other way." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190535.mp3" | "على الشرطة ألا تقبل الرشاوي." | "The police shouldn't accept bribes." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190537.mp3" | "إنك متعب ، أليس كذلك ؟" | "You're tired, isn't it?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190538.mp3" | "لديه الكثير من المال في البنك." | "He has a lot of money in the bank." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190539.mp3" | ".كان ذلك أمله الوحيد و الأخير" | "This was his only and last hope." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190540.mp3" | "هل تعرف من يكون ؟" | "Do you know who he is?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19190541.mp3" | "أسبح مرةً في الأسبوع." | "I swim once a week." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19203347.mp3" | "لا تنس تذكرة الدخول." | "Don't forget the entry ticket." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19203348.mp3" | "بما أنك لديك حمى ، من الأفضل لك أن تبقى في المنزل." | "As you have a fever, It's best for you to stay at home." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19203349.mp3" | "سأحوّل مبلغ المال." | "I'll transfer this amount of money." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19203351.mp3" | "هذا كتاب جيد." | "This is a good book." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19206752.mp3" | "ألديك أقلام كثيرة ؟" | "Have many pens?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19206753.mp3" | "أبدأ لن أشك في صدقه." | "I will never doubt his honesty." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19206754.mp3" | "الكل يوافقك الرأي." | "All agree, with you." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19206755.mp3" | "الليمون حامض." | "Lemon is sour." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19206756.mp3" | ".أُفضل العمل وحدي" | "I prefer to work alone." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19206757.mp3" | "هاجمنا العدو ليلاً." | "The enemy attacked us at night." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19206759.mp3" | ".إنه ذكي" | "He's smart." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19206760.mp3" | ".أسكن في أوساكا" | "I live in Osaka." | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19206761.mp3" | "هل بإمكانك السباحة ؟" | "Can you swim?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19206762.mp3" | "أين مكان عملك ؟" | "Where is your place of business?" | 0
(ar.en) |
|
"/storage/hf-datasets-cache/all/datasets/13051298887778-config-parquet-and-info-google-xtreme_s-3c4cdbc9/downloads/extracted/30b794cac1082bb5ce529257e1301aea5a7ec5ffd6241930039c4b75a87b8e45/clips/common_voice_ar_19206763.mp3" | "هناك رجل غريب أمام المنزل." | "There's a strange man in front of the house." | 0
(ar.en) |
XTREME-S
The Cross-lingual TRansfer Evaluation of Multilingual Encoders for Speech (XTREME-S) benchmark is a benchmark designed to evaluate speech representations across languages, tasks, domains and data regimes. It covers 102 languages from 10+ language families, 3 different domains and 4 task families: speech recognition, translation, classification and retrieval.
TLDR; XTREME-S is the first speech benchmark that is both diverse, fully accessible, and reproducible. All datasets can be downloaded with a single line of code. An easy-to-use and flexible fine-tuning script is provided and actively maintained.
XTREME-S covers speech recognition with Fleurs, Multilingual LibriSpeech (MLS) and VoxPopuli, speech translation with CoVoST-2, speech classification with LangID (Fleurs) and intent classification (MInds-14) and finally speech(-text) retrieval with Fleurs. Each of the tasks covers a subset of the 102 languages included in XTREME-S, from various regions:
- Western Europe: Asturian, Bosnian, Catalan, Croatian, Danish, Dutch, English, Finnish, French, Galician, German, Greek, Hungarian, Icelandic, Irish, Italian, Kabuverdianu, Luxembourgish, Maltese, Norwegian, Occitan, Portuguese, Spanish, Swedish, Welsh
- Eastern Europe: Armenian, Belarusian, Bulgarian, Czech, Estonian, Georgian, Latvian, Lithuanian, Macedonian, Polish, Romanian, Russian, Serbian, Slovak, Slovenian, Ukrainian
- Central-Asia/Middle-East/North-Africa: Arabic, Azerbaijani, Hebrew, Kazakh, Kyrgyz, Mongolian, Pashto, Persian, Sorani-Kurdish, Tajik, Turkish, Uzbek
- Sub-Saharan Africa: Afrikaans, Amharic, Fula, Ganda, Hausa, Igbo, Kamba, Lingala, Luo, Northern-Sotho, Nyanja, Oromo, Shona, Somali, Swahili, Umbundu, Wolof, Xhosa, Yoruba, Zulu
- South-Asia: Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Nepali, Oriya, Punjabi, Sindhi, Tamil, Telugu, Urdu
- South-East Asia: Burmese, Cebuano, Filipino, Indonesian, Javanese, Khmer, Lao, Malay, Maori, Thai, Vietnamese
- CJK languages: Cantonese and Mandarin Chinese, Japanese, Korean
Design principles
Diversity
XTREME-S aims for task, domain and language diversity. Tasks should be diverse and cover several domains to provide a reliable evaluation of model generalization and robustness to noisy naturally-occurring speech in different environments. Languages should be diverse to ensure that models can adapt to a wide range of linguistic and phonological phenomena.
Accessibility
The sub-dataset for each task can be downloaded with a single line of code as shown in Supported Tasks. Each task is available under a permissive license that allows the use and redistribution of the data for research purposes. Tasks have been selected based on their usage by pre-existing multilingual pre-trained models, for simplicity.
Reproducibility
We produce fully open-sourced, maintained and easy-to-use fine-tuning scripts for each task as shown under Fine-tuning Example. XTREME-S encourages submissions that leverage publicly available speech and text datasets. Users should detail which data they use. In general, we encourage settings that can be reproduced by the community, but also encourage the exploration of new frontiers for speech representation learning.
Fine-tuning and Evaluation Example
We provide a fine-tuning script under research-projects/xtreme-s. The fine-tuning script is written in PyTorch and allows one to fine-tune and evaluate any Model Database model on XTREME-S. The example script is actively maintained by @anton-l and @patrickvonplaten. Feel free to reach out via issues or pull requests on GitHub if you have any questions.
Leaderboards
The leaderboard for the XTREME-S benchmark can be found at this address (TODO(PVP)).
Supported Tasks
Note that the suppoprted tasks are focused particularly on linguistic aspect of speech, while nonlinguistic/paralinguistic aspects of speech relevant to e.g. speech synthesis or voice conversion are not evaluated.
1. Speech Recognition (ASR)
We include three speech recognition datasets: FLEURS-ASR, MLS and VoxPopuli (optionally BABEL). Multilingual fine-tuning is used for these three datasets.
FLEURS-ASR
FLEURS-ASR is the speech version of the FLORES machine translation benchmark, covering 2000 n-way parallel sentences in n=102 languages.
from datasets import load_dataset
fleurs_asr = load_dataset("google/xtreme_s", "fleurs.af_za") # for Afrikaans
# to download all data for multi-lingual fine-tuning uncomment following line
# fleurs_asr = load_dataset("google/xtreme_s", "fleurs.all")
# see structure
print(fleurs_asr)
# load audio sample on the fly
audio_input = fleurs_asr["train"][0]["audio"] # first decoded audio sample
transcription = fleurs_asr["train"][0]["transcription"] # first transcription
# use `audio_input` and `transcription` to fine-tune your model for ASR
# for analyses see language groups
all_language_groups = fleurs_asr["train"].features["lang_group_id"].names
lang_group_id = fleurs_asr["train"][0]["lang_group_id"]
all_language_groups[lang_group_id]
Multilingual LibriSpeech (MLS)
MLS is a large multilingual corpus derived from read audiobooks from LibriVox and consists of 8 languages. For this challenge the training data is limited to 10-hours splits.
from datasets import load_dataset
mls = load_dataset("google/xtreme_s", "mls.pl") # for Polish
# to download all data for multi-lingual fine-tuning uncomment following line
# mls = load_dataset("google/xtreme_s", "mls.all")
# see structure
print(mls)
# load audio sample on the fly
audio_input = mls["train"][0]["audio"] # first decoded audio sample
transcription = mls["train"][0]["transcription"] # first transcription
# use `audio_input` and `transcription` to fine-tune your model for ASR
VoxPopuli
VoxPopuli is a large-scale multilingual speech corpus for representation learning and semi-supervised learning, from which we use the speech recognition dataset. The raw data is collected from 2009-2020 European Parliament event recordings. We acknowledge the European Parliament for creating and sharing these materials.
VoxPopuli has to download the whole dataset 100GB since languages are entangled into each other - maybe not worth testing here due to the size
from datasets import load_dataset
voxpopuli = load_dataset("google/xtreme_s", "voxpopuli.ro") # for Romanian
# to download all data for multi-lingual fine-tuning uncomment following line
# voxpopuli = load_dataset("google/xtreme_s", "voxpopuli.all")
# see structure
print(voxpopuli)
# load audio sample on the fly
audio_input = voxpopuli["train"][0]["audio"] # first decoded audio sample
transcription = voxpopuli["train"][0]["transcription"] # first transcription
# use `audio_input` and `transcription` to fine-tune your model for ASR
(Optionally) BABEL
BABEL from IARPA is a conversational speech recognition dataset in low-resource languages. First, download LDC2016S06, LDC2016S12, LDC2017S08, LDC2017S05 and LDC2016S13. BABEL is the only dataset in our benchmark who is less easily accessible, so you will need to sign in to get access to it on LDC. Although not officially part of the XTREME-S ASR datasets, BABEL is often used for evaluating speech representations on a difficult domain (phone conversations).
from datasets import load_dataset
babel = load_dataset("google/xtreme_s", "babel.as")
The above command is expected to fail with a nice error message, explaining how to download BABEL
The following should work:
from datasets import load_dataset
babel = load_dataset("google/xtreme_s", "babel.as", data_dir="/path/to/IARPA_BABEL_OP1_102_LDC2016S06.zip")
# see structure
print(babel)
# load audio sample on the fly
audio_input = babel["train"][0]["audio"] # first decoded audio sample
transcription = babel["train"][0]["transcription"] # first transcription
# use `audio_input` and `transcription` to fine-tune your model for ASR
2. Speech Translation (ST)
We include the CoVoST-2 dataset for automatic speech translation.
CoVoST-2
The CoVoST-2 benchmark has become a commonly used dataset for evaluating automatic speech translation. It covers language pairs from English into 15 languages, as well as 21 languages into English. We use only the "X->En" direction to evaluate cross-lingual representations. The amount of supervision varies greatly in this setting, from one hour for Japanese->English to 180 hours for French->English. This makes pretraining particularly useful to enable such few-shot learning. We enforce multiligual fine-tuning for simplicity. Results are splitted in high/med/low-resource language pairs as explained in the [paper (TODO(PVP))].
from datasets import load_dataset
covost_2 = load_dataset("google/xtreme_s", "covost2.id.en") # for Indonesian to English
# to download all data for multi-lingual fine-tuning uncomment following line
# covost_2 = load_dataset("google/xtreme_s", "covost2.all")
# see structure
print(covost_2)
# load audio sample on the fly
audio_input = covost_2["train"][0]["audio"] # first decoded audio sample
transcription = covost_2["train"][0]["transcription"] # first transcription
translation = covost_2["train"][0]["translation"] # first translation
# use audio_input and translation to fine-tune your model for AST
3. Speech Classification
We include two multilingual speech classification datasets: FLEURS-LangID and Minds-14.
Language Identification - FLEURS-LangID
LangID can often be a domain classification, but in the case of FLEURS-LangID, recordings are done in a similar setting across languages and the utterances correspond to n-way parallel sentences, in the exact same domain, making this task particularly relevant for evaluating LangID. The setting is simple, FLEURS-LangID is splitted in train/valid/test for each language. We simply create a single train/valid/test for LangID by merging all.
from datasets import load_dataset
fleurs_langID = load_dataset("google/xtreme_s", "fleurs.all") # to download all data
# see structure
print(fleurs_langID)
# load audio sample on the fly
audio_input = fleurs_langID["train"][0]["audio"] # first decoded audio sample
language_class = fleurs_langID["train"][0]["lang_id"] # first id class
language = fleurs_langID["train"].features["lang_id"].names[language_class]
# use audio_input and language_class to fine-tune your model for audio classification
Intent classification - Minds-14
Minds-14 is an intent classification made from e-banking speech datasets in 14 languages, with 14 intent labels. We impose a single multilingual fine-tuning to increase the size of the train and test sets and reduce the variance associated with the small size of the dataset per language.
from datasets import load_dataset
minds_14 = load_dataset("google/xtreme_s", "minds14.fr-FR") # for French
# to download all data for multi-lingual fine-tuning uncomment following line
# minds_14 = load_dataset("google/xtreme_s", "minds14.all")
# see structure
print(minds_14)
# load audio sample on the fly
audio_input = minds_14["train"][0]["audio"] # first decoded audio sample
intent_class = minds_14["train"][0]["intent_class"] # first transcription
intent = minds_14["train"].features["intent_class"].names[intent_class]
# use audio_input and language_class to fine-tune your model for audio classification
4. (Optionally) Speech Retrieval
We optionally include one speech retrieval dataset: FLEURS-Retrieval as explained in the FLEURS paper.
FLEURS-Retrieval
FLEURS-Retrieval provides n-way parallel speech and text data. Similar to how XTREME for text leverages Tatoeba to evaluate bitext mining a.k.a sentence translation retrieval, we use FLEURS-Retrieval to evaluate the quality of fixed-size representations of speech utterances. Our goal is to incentivize the creation of fixed-size speech encoder for speech retrieval. The system has to retrieve the English "key" utterance corresponding to the speech translation of "queries" in 15 languages. Results have to be reported on the test sets of FLEURS-Retrieval whose utterances are used as queries (and keys for English). We augment the English keys with a large number of utterances to make the task more difficult.
from datasets import load_dataset
fleurs_retrieval = load_dataset("google/xtreme_s", "fleurs.af_za") # for Afrikaans
# to download all data for multi-lingual fine-tuning uncomment following line
# fleurs_retrieval = load_dataset("google/xtreme_s", "fleurs.all")
# see structure
print(fleurs_retrieval)
# load audio sample on the fly
audio_input = fleurs_retrieval["train"][0]["audio"] # decoded audio sample
text_sample_pos = fleurs_retrieval["train"][0]["transcription"] # positive text sample
text_sample_neg = fleurs_retrieval["train"][1:20]["transcription"] # negative text samples
# use `audio_input`, `text_sample_pos`, and `text_sample_neg` to fine-tune your model for retrieval
Users can leverage the training (and dev) sets of FLEURS-Retrieval with a ranking loss to build better cross-lingual fixed-size representations of speech.
Dataset Structure
The XTREME-S benchmark is composed of the following datasets:
- FLEURS
- Multilingual Librispeech (MLS)
Note that for MLS, XTREME-S uses
path
instead offile
andtranscription
instead oftext
. - Voxpopuli
- Minds14
- Covost2
Note that for Covost2, XTREME-S uses
path
instead offile
andtranscription
instead ofsentence
. - BABEL
Please click on the link of the dataset cards to get more information about its dataset structure.
Dataset Creation
The XTREME-S benchmark is composed of the following datasets:
Please visit the corresponding dataset cards to get more information about the source data.
Considerations for Using the Data
Social Impact of Dataset
This dataset is meant to encourage the development of speech technology in a lot more languages of the world. One of the goal is to give equal access to technologies like speech recognition or speech translation to everyone, meaning better dubbing or better access to content from the internet (like podcasts, streaming or videos).
Discussion of Biases
Most datasets have a fair distribution of gender utterances (e.g. the newly introduced FLEURS dataset). While many languages are covered from various regions of the world, the benchmark misses many languages that are all equally important. We believe technology built through XTREME-S should generalize to all languages.
Other Known Limitations
The benchmark has a particular focus on read-speech because common evaluation benchmarks like CoVoST-2 or LibriSpeech evaluate on this type of speech. There is sometimes a known mismatch between performance obtained in a read-speech setting and a more noisy setting (in production for instance). Given the big progress that remains to be made on many languages, we believe better performance on XTREME-S should still correlate well with actual progress made for speech understanding.
Additional Information
All datasets are licensed under the Creative Commons license (CC-BY).
Citation Information
XTREME-S
@article{conneau2022xtreme,
title={XTREME-S: Evaluating Cross-lingual Speech Representations},
author={Conneau, Alexis and Bapna, Ankur and Zhang, Yu and Ma, Min and von Platen, Patrick and Lozhkov, Anton and Cherry, Colin and Jia, Ye and Rivera, Clara and Kale, Mihir and others},
journal={arXiv preprint arXiv:2203.10752},
year={2022}
}
MLS
@article{Pratap2020MLSAL,
title={MLS: A Large-Scale Multilingual Dataset for Speech Research},
author={Vineel Pratap and Qiantong Xu and Anuroop Sriram and Gabriel Synnaeve and Ronan Collobert},
journal={ArXiv},
year={2020},
volume={abs/2012.03411}
}
VoxPopuli
@article{wang2021voxpopuli,
title={Voxpopuli: A large-scale multilingual speech corpus for representation learning, semi-supervised learning and interpretation},
author={Wang, Changhan and Riviere, Morgane and Lee, Ann and Wu, Anne and Talnikar, Chaitanya and Haziza, Daniel and Williamson, Mary and Pino, Juan and Dupoux, Emmanuel},
journal={arXiv preprint arXiv:2101.00390},
year={2021}
}
CoVoST 2
@article{DBLP:journals/corr/abs-2007-10310,
author = {Changhan Wang and
Anne Wu and
Juan Miguel Pino},
title = {CoVoST 2: {A} Massively Multilingual Speech-to-Text Translation Corpus},
journal = {CoRR},
volume = {abs/2007.10310},
year = {2020},
url = {https://arxiv.org/abs/2007.10310},
eprinttype = {arXiv},
eprint = {2007.10310},
timestamp = {Thu, 12 Aug 2021 15:37:06 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2007-10310.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
Minds14
@article{gerz2021multilingual,
title={Multilingual and cross-lingual intent detection from spoken data},
author={Gerz, Daniela and Su, Pei-Hao and Kusztos, Razvan and Mondal, Avishek and Lis, Micha{\l} and Singhal, Eshan and Mrk{\v{s}}i{\'c}, Nikola and Wen, Tsung-Hsien and Vuli{\'c}, Ivan},
journal={arXiv preprint arXiv:2104.08524},
year={2021}
}
Contributions
Thanks to @patrickvonplaten, @anton-l, @aconneau for adding this dataset
- Downloads last month
- 12,176