Datasets:
file
string
| is_valid
bool
| language
class label
1 classes
| speaker_id
string
| gender
class label
4 classes
| keyword
string
| audio
audio
|
---|---|---|---|---|---|---|
null | true | 0
(ar) | "3ed0ada6e07c029a8e33c022429b902ecda77dbb159e19fc06af5af7efb590081b2105a87c4eecf0a68785a396bcd4590fcc175ad7e995a94182ff6b13d26eb7" | 0
(MALE) | "آخذ" | |
null | true | 0
(ar) | "3ed0ada6e07c029a8e33c022429b902ecda77dbb159e19fc06af5af7efb590081b2105a87c4eecf0a68785a396bcd4590fcc175ad7e995a94182ff6b13d26eb7" | 0
(MALE) | "آخذ" | |
null | true | 0
(ar) | "61ca692e975b8e4882cdfcb40d756ab2b8e9eeeb6b9fb978d22ca69e65ce74fa6b3dc34404e81ef72694eb244410e05e2699d0ae64830f61f691920ac231255a" | 3
(NAN) | "آخذ" | |
null | true | 0
(ar) | "c6bb8395b07b2deab4bd8bbe259f5a9424b1c3de39cafb0233132a78eab274f864fadb1c0b72e1c073444cba56ccac5b710900f44c702bbe1ec38db32cb4f084" | 0
(MALE) | "آخذ" | |
null | true | 0
(ar) | "6fd885b5ae246c0f125138cdd66534a8d6670745bcf2690c8bdd9e4af4a5ee370d325f7725b74bed2670d22b1774bbbee7bef6172d33b7cb431b2b1e13eaca3d" | 1
(FEMALE) | "آخر" | |
null | true | 0
(ar) | "6fd885b5ae246c0f125138cdd66534a8d6670745bcf2690c8bdd9e4af4a5ee370d325f7725b74bed2670d22b1774bbbee7bef6172d33b7cb431b2b1e13eaca3d" | 1
(FEMALE) | "آخر" | |
null | true | 0
(ar) | "6fd885b5ae246c0f125138cdd66534a8d6670745bcf2690c8bdd9e4af4a5ee370d325f7725b74bed2670d22b1774bbbee7bef6172d33b7cb431b2b1e13eaca3d" | 1
(FEMALE) | "آخر" | |
null | true | 0
(ar) | "ffdc4c1679b3e7fb72a11d2faf84cca7c87122d95198158193e272e9710875d0c8e6e10f29c8c121450f447686b6347409592bd5c362dce0b0cc5bbe1fec9d00" | 1
(FEMALE) | "آخر" | |
null | true | 0
(ar) | "e23fcbfdbd4639b11c95fb4c8fb94d4b8c576234203af247a6aa4e230e0247e37085eae1fa5aef4832e68cae29d165e45ebccc16ea830d43827c700877602c72" | 0
(MALE) | "آخر" | |
null | true | 0
(ar) | "e23fcbfdbd4639b11c95fb4c8fb94d4b8c576234203af247a6aa4e230e0247e37085eae1fa5aef4832e68cae29d165e45ebccc16ea830d43827c700877602c72" | 0
(MALE) | "آخر" | |
null | true | 0
(ar) | "0512779dbc718e90b827551544de65a0f94c352e2d9325c9c227937bd5c518f6093608ec7006f04babbc83771755f273e7b88952af0359446c56439d40f59e16" | 0
(MALE) | "آخر" | |
null | true | 0
(ar) | "0512779dbc718e90b827551544de65a0f94c352e2d9325c9c227937bd5c518f6093608ec7006f04babbc83771755f273e7b88952af0359446c56439d40f59e16" | 0
(MALE) | "آخر" | |
null | true | 0
(ar) | "f38eaaf535a8b01ee2fc6a673f91499fa59a508a3dbdda0372c1cd140d46fc2b070aba7074324e8ea83526e4d28924a00393220fd37526d82bcc81c0cde5bafd" | 0
(MALE) | "آخر" | |
null | true | 0
(ar) | "6cbfb52b5cd8ca0022cca1cf6df41a1d40c53601ac0fa6a96a48d683f2a53098fef94d004fcc38d66e96190f75d915d40c679d7c6736e301d97afe63698352a7" | 1
(FEMALE) | "آخر" | |
null | true | 0
(ar) | "61ca692e975b8e4882cdfcb40d756ab2b8e9eeeb6b9fb978d22ca69e65ce74fa6b3dc34404e81ef72694eb244410e05e2699d0ae64830f61f691920ac231255a" | 3
(NAN) | "آخر" | |
null | true | 0
(ar) | "848bf4948fe5911a0dd9d57aeb694b2756fc065e941454739568a676c19cb9a0897c2768dc3fa57fac2d18482c99cc5841b8e02e71d85a87cc4d0f849b88bd94" | 0
(MALE) | "آخر" | |
null | true | 0
(ar) | "7a23b4280f7e69fc80a21d16b8f3d2cf184325c9a58dbeefd5566a962f5bfa673c6e5a6b70a2f21c6b946545b2880dcc08e0dbd96a9976a3a15cdfb2fde48f5c" | 1
(FEMALE) | "آخر" | |
null | true | 0
(ar) | "5af571c2292bd0e1187d7a413805b92b6c0c71ee785868dffd27d70b1c08fb720c8a35bcbf773f788354e7d0f135af8b1523c9d69dd666ea8ed9672dfd7dd3c9" | 0
(MALE) | "آخر" | |
null | true | 0
(ar) | "7d36cb936a59b0ef45b8e08a7ba882f1e1c92b201c96caf17ed6274b360f865063d4d3b0ea95cbbf67451c592fcd5cfe89666d8a542dd5b03b07123192bfa9a9" | 0
(MALE) | "آخر" | |
null | true | 0
(ar) | "a03a946ee0bfa3c6f35cfa4ba7566da913ec3b828d8feb74465725873a72a5b78c7493b0f9068bcc66238d29ccdbfb2ab63b3d85bc3e65b9628b7b863ad781a5" | 0
(MALE) | "آذار" | |
null | true | 0
(ar) | "4be94c5f185044015844b2fd6a4221f18bfaf2ba15209d9adfd0aafcfea3e019c4a7d424f4b725b2bc7ac65af0a73a3073b2738a0c5ffe3c960dbd9daeed6432" | 0
(MALE) | "آذار" | |
null | true | 0
(ar) | "7ec1f7cd677a0afeffb58d131245187290f6862e84e19d016560aad71082fef461717c1811a36485706d42323d5ff54bb89eac144e1f0ed6b5669b2c469930dd" | 0
(MALE) | "آذار" | |
null | true | 0
(ar) | "c13109959f4fee1a0e3c72f001b62a686b45656148dba0e1f271f3d5c12345e127eed266a112ca121e74ae38d07782b456ad9b9fc585dd391f44756b12637372" | 3
(NAN) | "آسف" | |
null | true | 0
(ar) | "3ed0ada6e07c029a8e33c022429b902ecda77dbb159e19fc06af5af7efb590081b2105a87c4eecf0a68785a396bcd4590fcc175ad7e995a94182ff6b13d26eb7" | 0
(MALE) | "آسف" | |
null | true | 0
(ar) | "f38eaaf535a8b01ee2fc6a673f91499fa59a508a3dbdda0372c1cd140d46fc2b070aba7074324e8ea83526e4d28924a00393220fd37526d82bcc81c0cde5bafd" | 0
(MALE) | "آسف" | |
null | true | 0
(ar) | "24d48acd09b057867118d9f1558fd2c6045be3b39a4ad535b6ca8e790236b0d066361070524a87734186ce152874a8b8670d893f477f95f6c596acdb8b96224f" | 3
(NAN) | "آسف" | |
null | true | 0
(ar) | "3ed0ada6e07c029a8e33c022429b902ecda77dbb159e19fc06af5af7efb590081b2105a87c4eecf0a68785a396bcd4590fcc175ad7e995a94182ff6b13d26eb7" | 0
(MALE) | "آكل" | |
null | true | 0
(ar) | "3ed0ada6e07c029a8e33c022429b902ecda77dbb159e19fc06af5af7efb590081b2105a87c4eecf0a68785a396bcd4590fcc175ad7e995a94182ff6b13d26eb7" | 0
(MALE) | "آكل" | |
null | true | 0
(ar) | "3ed0ada6e07c029a8e33c022429b902ecda77dbb159e19fc06af5af7efb590081b2105a87c4eecf0a68785a396bcd4590fcc175ad7e995a94182ff6b13d26eb7" | 0
(MALE) | "آكل" | |
null | true | 0
(ar) | "f38eaaf535a8b01ee2fc6a673f91499fa59a508a3dbdda0372c1cd140d46fc2b070aba7074324e8ea83526e4d28924a00393220fd37526d82bcc81c0cde5bafd" | 0
(MALE) | "آكل" | |
null | true | 0
(ar) | "a4a76115535203d52f6ffd146fdec48a0363deb9e05628e304d9d7ae51fd8e2295344498506a1c20ef7ec18c1e6c2c2c5e570c4057040e2319ae18c0d170fabe" | 3
(NAN) | "آكل" | |
null | true | 0
(ar) | "c6bb8395b07b2deab4bd8bbe259f5a9424b1c3de39cafb0233132a78eab274f864fadb1c0b72e1c073444cba56ccac5b710900f44c702bbe1ec38db32cb4f084" | 0
(MALE) | "آكل" | |
null | true | 0
(ar) | "c6bb8395b07b2deab4bd8bbe259f5a9424b1c3de39cafb0233132a78eab274f864fadb1c0b72e1c073444cba56ccac5b710900f44c702bbe1ec38db32cb4f084" | 0
(MALE) | "آكل" | |
null | true | 0
(ar) | "7ec1f7cd677a0afeffb58d131245187290f6862e84e19d016560aad71082fef461717c1811a36485706d42323d5ff54bb89eac144e1f0ed6b5669b2c469930dd" | 0
(MALE) | "آكل" | |
null | true | 0
(ar) | "e32d20173e5fdf1118ad2120d87d816ab9cd454cccf78cb3d7ee4392eb2ce6330c49d74412fb2ddb8aa878c3040e76231d5e5520ce1df84a74b448c5f60d42a4" | 1
(FEMALE) | "آلة" | |
null | true | 0
(ar) | "7ec1f7cd677a0afeffb58d131245187290f6862e84e19d016560aad71082fef461717c1811a36485706d42323d5ff54bb89eac144e1f0ed6b5669b2c469930dd" | 0
(MALE) | "آلة" | |
null | true | 0
(ar) | "4ad03efa6ea483f6118c02befd8ead602a78b269295e9f57f9cbb3ec92c05aff6b6c85d9f218fae9af3141344de70bc1c10d168d4ea4197551f82b2a6de787c3" | 3
(NAN) | "آلة" | |
null | true | 0
(ar) | "53443952e7722c4537513d1ceb908ae34b53a5a187fc814cdfea4a58b06c285e250c6cf7d42aaf85b208b10859376822758ac0d2fbd2a30e33b2b0da62a16a95" | 0
(MALE) | "آلة" | |
null | true | 0
(ar) | "3ed0ada6e07c029a8e33c022429b902ecda77dbb159e19fc06af5af7efb590081b2105a87c4eecf0a68785a396bcd4590fcc175ad7e995a94182ff6b13d26eb7" | 0
(MALE) | "آلي" | |
null | true | 0
(ar) | "3ed0ada6e07c029a8e33c022429b902ecda77dbb159e19fc06af5af7efb590081b2105a87c4eecf0a68785a396bcd4590fcc175ad7e995a94182ff6b13d26eb7" | 0
(MALE) | "آلي" | |
null | true | 0
(ar) | "4613605ed3db2629289f866168978031dba401b5ccd9ca4f58a1ee35c43ef75866da2fe93e1d1dbfbe5506e99edd14e58b9708e1cf6931bdf678d43c49b4ef24" | 1
(FEMALE) | "آلي" | |
null | true | 0
(ar) | "f38eaaf535a8b01ee2fc6a673f91499fa59a508a3dbdda0372c1cd140d46fc2b070aba7074324e8ea83526e4d28924a00393220fd37526d82bcc81c0cde5bafd" | 0
(MALE) | "آلي" | |
null | true | 0
(ar) | "c6bb8395b07b2deab4bd8bbe259f5a9424b1c3de39cafb0233132a78eab274f864fadb1c0b72e1c073444cba56ccac5b710900f44c702bbe1ec38db32cb4f084" | 0
(MALE) | "آلي" | |
null | true | 0
(ar) | "f3941fecaac2dc0dd1417319514d70d08ad3757f26b8e02440f8eb9bd8858d6be9436079684cd3045d40ccb1ebf7c8cd89f8c8381828cb08744a7cadad08b458" | 3
(NAN) | "آلي" | |
null | true | 0
(ar) | "5928f4424623201f2c5a970e9785a789bfb5384b13fa112e4e8efe960dd22e0ae934451b2a8489acf65068b50dffbfa58a74a1a01c7064eac86718bac1ad0acc" | 0
(MALE) | "آلي" | |
null | true | 0
(ar) | "5da04449ee1a61de4a1a445db7301c576a7900fc0ba741184751ff057236b40498231ed7c3200b599b45ce59fe22f31385c1843b6aae6e22f258839f80b00cec" | 0
(MALE) | "آلي" | |
null | true | 0
(ar) | "3ed0ada6e07c029a8e33c022429b902ecda77dbb159e19fc06af5af7efb590081b2105a87c4eecf0a68785a396bcd4590fcc175ad7e995a94182ff6b13d26eb7" | 0
(MALE) | "آمل" | |
null | true | 0
(ar) | "3ed0ada6e07c029a8e33c022429b902ecda77dbb159e19fc06af5af7efb590081b2105a87c4eecf0a68785a396bcd4590fcc175ad7e995a94182ff6b13d26eb7" | 0
(MALE) | "آمل" | |
null | true | 0
(ar) | "e23fcbfdbd4639b11c95fb4c8fb94d4b8c576234203af247a6aa4e230e0247e37085eae1fa5aef4832e68cae29d165e45ebccc16ea830d43827c700877602c72" | 0
(MALE) | "آمل" | |
null | true | 0
(ar) | "6de14f28f77ba488714e21cd47dde88930a6922232b1cb9c9df707ef137e279dba17d53615d9aacfb4c365e3235b06d554ffde39425434b4033c0070d3a7ba11" | 1
(FEMALE) | "آمل" | |
null | true | 0
(ar) | "61b1171f938d69d18865736060f77e2102d9fef891eabffc356f28a643e12bca280f95b918aa536e23b97f58d2b1d25c63bc5a2811191f590bc03798c3511615" | 3
(NAN) | "آمل" | |
null | true | 0
(ar) | "e23fcbfdbd4639b11c95fb4c8fb94d4b8c576234203af247a6aa4e230e0247e37085eae1fa5aef4832e68cae29d165e45ebccc16ea830d43827c700877602c72" | 0
(MALE) | "آمل" | |
null | true | 0
(ar) | "2911279ce52585bf6ccb58239e0bdb1a4607f0d5a274dfc9d053802af47d885864f80a770b58e7b364b36ac4aab17c325d99b89983fce527bffc136060e21896" | 0
(MALE) | "آمل" | |
null | true | 0
(ar) | "37a4fb88b9512b13804672695f54854e03de15fe0d151e64b0f7a123d9e71c60c2c08293bad2cb94ac5b70dce5dc48427fa36301c3e4ee3df98bcf4b54a75d83" | 0
(MALE) | "آمل" | |
null | true | 0
(ar) | "24d48acd09b057867118d9f1558fd2c6045be3b39a4ad535b6ca8e790236b0d066361070524a87734186ce152874a8b8670d893f477f95f6c596acdb8b96224f" | 3
(NAN) | "آمل" | |
null | true | 0
(ar) | "7ec1f7cd677a0afeffb58d131245187290f6862e84e19d016560aad71082fef461717c1811a36485706d42323d5ff54bb89eac144e1f0ed6b5669b2c469930dd" | 0
(MALE) | "آمل" | |
null | true | 0
(ar) | "3ed0ada6e07c029a8e33c022429b902ecda77dbb159e19fc06af5af7efb590081b2105a87c4eecf0a68785a396bcd4590fcc175ad7e995a94182ff6b13d26eb7" | 0
(MALE) | "أباه" | |
null | true | 0
(ar) | "e73ecdb916011a6830041d723a43be97b0e68ae1ab991afae566ae6653e4565a618ccea36fea5db176f720ca30f7c40c66f4ee0168adabbb789576da295bb76f" | 1
(FEMALE) | "أباه" | |
null | true | 0
(ar) | "f38eaaf535a8b01ee2fc6a673f91499fa59a508a3dbdda0372c1cd140d46fc2b070aba7074324e8ea83526e4d28924a00393220fd37526d82bcc81c0cde5bafd" | 0
(MALE) | "أباه" | |
null | true | 0
(ar) | "61ca692e975b8e4882cdfcb40d756ab2b8e9eeeb6b9fb978d22ca69e65ce74fa6b3dc34404e81ef72694eb244410e05e2699d0ae64830f61f691920ac231255a" | 3
(NAN) | "أباه" | |
null | true | 0
(ar) | "c6bb8395b07b2deab4bd8bbe259f5a9424b1c3de39cafb0233132a78eab274f864fadb1c0b72e1c073444cba56ccac5b710900f44c702bbe1ec38db32cb4f084" | 0
(MALE) | "أباه" | |
null | true | 0
(ar) | "521c4f5e97ffdec33647e0e6c472c2ac620abc14ee8d23558410eecd6ca88f661130c3c809d600fd2862781ce492747206a78dab1c39499fd567fa9d61511a8d" | 0
(MALE) | "أباه" | |
null | true | 0
(ar) | "85377c90ab1cf11593d17c553b5946e5f5d26a0882fbc9c30264fb71d832575ef91e2a2a6268e5bcd3256eba2135bbe2d1a902416f2c769ea469a61f513ae4db" | 0
(MALE) | "أبحث" | |
null | true | 0
(ar) | "e73ecdb916011a6830041d723a43be97b0e68ae1ab991afae566ae6653e4565a618ccea36fea5db176f720ca30f7c40c66f4ee0168adabbb789576da295bb76f" | 1
(FEMALE) | "أبحث" | |
null | true | 0
(ar) | "e23fcbfdbd4639b11c95fb4c8fb94d4b8c576234203af247a6aa4e230e0247e37085eae1fa5aef4832e68cae29d165e45ebccc16ea830d43827c700877602c72" | 0
(MALE) | "أبحث" | |
null | true | 0
(ar) | "61ca692e975b8e4882cdfcb40d756ab2b8e9eeeb6b9fb978d22ca69e65ce74fa6b3dc34404e81ef72694eb244410e05e2699d0ae64830f61f691920ac231255a" | 3
(NAN) | "أبحث" | |
null | true | 0
(ar) | "61ca692e975b8e4882cdfcb40d756ab2b8e9eeeb6b9fb978d22ca69e65ce74fa6b3dc34404e81ef72694eb244410e05e2699d0ae64830f61f691920ac231255a" | 3
(NAN) | "أبحث" | |
null | true | 0
(ar) | "8dc87fcfe937bc74f69ba0ac9f53b0d6a77cb579decfa96a5a16a4c2be9b147cdc7f87925ff197555a18d968eb5bde38f403beeefa0a5abc5bbe3a52ad78472c" | 3
(NAN) | "أبحث" | |
null | true | 0
(ar) | "38148ed668fa5985aed7bfce74e56a55d1c0c88b4e54e4a1c038887f8802055e755411b76e2ab77cf47adf1a95c8c7cf22f4b3f1c720426c96c914b5d2ccd92c" | 0
(MALE) | "أبعد" | |
null | true | 0
(ar) | "848bf4948fe5911a0dd9d57aeb694b2756fc065e941454739568a676c19cb9a0897c2768dc3fa57fac2d18482c99cc5841b8e02e71d85a87cc4d0f849b88bd94" | 0
(MALE) | "أبعد" | |
null | true | 0
(ar) | "7ec1f7cd677a0afeffb58d131245187290f6862e84e19d016560aad71082fef461717c1811a36485706d42323d5ff54bb89eac144e1f0ed6b5669b2c469930dd" | 0
(MALE) | "أبعد" | |
null | true | 0
(ar) | "6fd885b5ae246c0f125138cdd66534a8d6670745bcf2690c8bdd9e4af4a5ee370d325f7725b74bed2670d22b1774bbbee7bef6172d33b7cb431b2b1e13eaca3d" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "6fd885b5ae246c0f125138cdd66534a8d6670745bcf2690c8bdd9e4af4a5ee370d325f7725b74bed2670d22b1774bbbee7bef6172d33b7cb431b2b1e13eaca3d" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "52f0b11b4571a7c670fe631149eba1187dca404af883f4f56ee81e1c725270542bbecb7cf4ec0c1a5c8bcc8efa95c1bde06f08ec3077d23896f57b6e3246589e" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "52f0b11b4571a7c670fe631149eba1187dca404af883f4f56ee81e1c725270542bbecb7cf4ec0c1a5c8bcc8efa95c1bde06f08ec3077d23896f57b6e3246589e" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "52f0b11b4571a7c670fe631149eba1187dca404af883f4f56ee81e1c725270542bbecb7cf4ec0c1a5c8bcc8efa95c1bde06f08ec3077d23896f57b6e3246589e" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "6fd885b5ae246c0f125138cdd66534a8d6670745bcf2690c8bdd9e4af4a5ee370d325f7725b74bed2670d22b1774bbbee7bef6172d33b7cb431b2b1e13eaca3d" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "6fd885b5ae246c0f125138cdd66534a8d6670745bcf2690c8bdd9e4af4a5ee370d325f7725b74bed2670d22b1774bbbee7bef6172d33b7cb431b2b1e13eaca3d" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "6fd885b5ae246c0f125138cdd66534a8d6670745bcf2690c8bdd9e4af4a5ee370d325f7725b74bed2670d22b1774bbbee7bef6172d33b7cb431b2b1e13eaca3d" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "510f749dd53e107df7ad1182fad0cdd082e0b2ed7c9e038ce5c60a9db17220e6467fdb5070bd2a3672b75fdf94de578829030f8ecdbdfa295181f68cf350f262" | 0
(MALE) | "أبي" | |
null | true | 0
(ar) | "f5332db8ce939969b03ff6f9ca6dc548cd0bafcd3da7a06131547f24b0c1ce72a63e3124e6a6801678287c33a7cdd546b78523ee69689f9418d5448809febfc1" | 3
(NAN) | "أبي" | |
null | true | 0
(ar) | "f5332db8ce939969b03ff6f9ca6dc548cd0bafcd3da7a06131547f24b0c1ce72a63e3124e6a6801678287c33a7cdd546b78523ee69689f9418d5448809febfc1" | 3
(NAN) | "أبي" | |
null | true | 0
(ar) | "3b4ef4aaaaa79f6d78863b0e2f327ff4438a9d383f8a8339743993dccaa090ae8c9d113cc491e3ed6a08ae52e382495ef853364b68accd71ff34671cc7ccc443" | 3
(NAN) | "أبي" | |
null | true | 0
(ar) | "f0aac0a86b221a86a4859b6a2ec12cbfb636b829dc49a1805dd3dce9c618ebce63b5e67bd309c8f2b8fed4beb029730a346e15a5af1118315c942e98a5f4ea90" | 3
(NAN) | "أبي" | |
null | true | 0
(ar) | "ba46286916392e26090102e478e2f7c9c0e66fb80c6deb6d819b723d23784cc0426b085484fe09a43e8106266fdfcb2e8635a2d3bcd3b53990318c9ee3d1f7d2" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "ba46286916392e26090102e478e2f7c9c0e66fb80c6deb6d819b723d23784cc0426b085484fe09a43e8106266fdfcb2e8635a2d3bcd3b53990318c9ee3d1f7d2" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "510f749dd53e107df7ad1182fad0cdd082e0b2ed7c9e038ce5c60a9db17220e6467fdb5070bd2a3672b75fdf94de578829030f8ecdbdfa295181f68cf350f262" | 0
(MALE) | "أبي" | |
null | true | 0
(ar) | "510f749dd53e107df7ad1182fad0cdd082e0b2ed7c9e038ce5c60a9db17220e6467fdb5070bd2a3672b75fdf94de578829030f8ecdbdfa295181f68cf350f262" | 0
(MALE) | "أبي" | |
null | true | 0
(ar) | "b268122bc2894380519220337f90df10ce313013fbb6263ed1ad7d16c6bf9b13c3417fb09f3daa86a3f2f7b75cf3cea90eb81d27a30dab100d6d99356c0b9511" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "b268122bc2894380519220337f90df10ce313013fbb6263ed1ad7d16c6bf9b13c3417fb09f3daa86a3f2f7b75cf3cea90eb81d27a30dab100d6d99356c0b9511" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "b268122bc2894380519220337f90df10ce313013fbb6263ed1ad7d16c6bf9b13c3417fb09f3daa86a3f2f7b75cf3cea90eb81d27a30dab100d6d99356c0b9511" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "b268122bc2894380519220337f90df10ce313013fbb6263ed1ad7d16c6bf9b13c3417fb09f3daa86a3f2f7b75cf3cea90eb81d27a30dab100d6d99356c0b9511" | 1
(FEMALE) | "أبي" | |
null | true | 0
(ar) | "88c494babca045f30c410d3a34452a21ae7e4b35e35fc3f09ae51af764a1f18748ee4ddf85ca38b388d4092095f8fcd0de5ae6701ae5d5ef249c134d4c80357b" | 0
(MALE) | "أبي" | |
null | true | 0
(ar) | "88c494babca045f30c410d3a34452a21ae7e4b35e35fc3f09ae51af764a1f18748ee4ddf85ca38b388d4092095f8fcd0de5ae6701ae5d5ef249c134d4c80357b" | 0
(MALE) | "أبي" | |
null | true | 0
(ar) | "30c7fc5a331d1dcfa95175b4c686e84f742e9ef768cf707d9c6a2568f734584ea6473e799f2b60091497717c379ccf7c883b40e51d3449a9d506fab8c022bb5d" | 2
(OTHER) | "أبي" | |
null | true | 0
(ar) | "3b4ef4aaaaa79f6d78863b0e2f327ff4438a9d383f8a8339743993dccaa090ae8c9d113cc491e3ed6a08ae52e382495ef853364b68accd71ff34671cc7ccc443" | 3
(NAN) | "أبي" | |
null | true | 0
(ar) | "93d5f73a2ab5218e400186a995921d066149a56aa5db4ad1bb93b0ba044c09c56fd5c6adf11c2402aeb0fc94dfa8cbd6de680f6456ad7ab9e314bf3f31de03ae" | 3
(NAN) | "أبي" | |
null | true | 0
(ar) | "e23fcbfdbd4639b11c95fb4c8fb94d4b8c576234203af247a6aa4e230e0247e37085eae1fa5aef4832e68cae29d165e45ebccc16ea830d43827c700877602c72" | 0
(MALE) | "أبي" | |
null | true | 0
(ar) | "e23fcbfdbd4639b11c95fb4c8fb94d4b8c576234203af247a6aa4e230e0247e37085eae1fa5aef4832e68cae29d165e45ebccc16ea830d43827c700877602c72" | 0
(MALE) | "أبي" | |
null | true | 0
(ar) | "e23fcbfdbd4639b11c95fb4c8fb94d4b8c576234203af247a6aa4e230e0247e37085eae1fa5aef4832e68cae29d165e45ebccc16ea830d43827c700877602c72" | 0
(MALE) | "أبي" |
Dataset Card for Multilingual Spoken Words
Dataset Summary
Multilingual Spoken Words Corpus is a large and growing audio dataset of spoken words in 50 languages collectively spoken by over 5 billion people, for academic research and commercial applications in keyword spotting and spoken term search, licensed under CC-BY 4.0. The dataset contains more than 340,000 keywords, totaling 23.4 million 1-second spoken examples (over 6,000 hours). The dataset has many use cases, ranging from voice-enabled consumer devices to call center automation. This dataset is generated by applying forced alignment on crowd-sourced sentence-level audio to produce per-word timing estimates for extraction. All alignments are included in the dataset.
Data is provided in two formats: wav
(16KHz) and opus
(48KHz). Default configurations look like
"{lang}_{format}"
, so to load, for example, Tatar in wav format do:
ds = load_dataset("MLCommons/ml_spoken_words", "tt_wav")
To download multiple languages in a single dataset pass list of languages to languages
argument:
ds = load_dataset("MLCommons/ml_spoken_words", languages=["ar", "tt", "br"])
To download a specific format pass it to the format
argument (default format is wav
):
ds = load_dataset("MLCommons/ml_spoken_words", languages=["ar", "tt", "br"], format="opus")
Note that each time you provide different sets of languages, examples are generated from scratch even if you already provided one or several of them before because custom configurations are created each time (the data is not redownloaded though).
Supported Tasks and Leaderboards
Keyword spotting, Spoken term search
Languages
The dataset is multilingual. To specify several languages to download pass a list of them to the
languages
argument:
ds = load_dataset("MLCommons/ml_spoken_words", languages=["ar", "tt", "br"])
The dataset contains data for the following languages:
Low-resourced (<10 hours):
- Arabic (0.1G, 7.6h)
- Assamese (0.9M, 0.1h)
- Breton (69M, 5.6h)
- Chuvash (28M, 2.1h)
- Chinese (zh-CN) (42M, 3.1h)
- Dhivehi (0.7M, 0.04h)
- Frisian (0.1G, 9.6h)
- Georgian (20M, 1.4h)
- Guarani (0.7M, 1.3h)
- Greek (84M, 6.7h)
- Hakha Chin (26M, 0.1h)
- Hausa (90M, 1.0h)
- Interlingua (58M, 4.0h)
- Irish (38M, 3.2h)
- Latvian (51M, 4.2h)
- Lithuanian (21M, 0.46h)
- Maltese (88M, 7.3h)
- Oriya (0.7M, 0.1h)
- Romanian (59M, 4.5h)
- Sakha (42M, 3.3h)
- Slovenian (43M, 3.0h)
- Slovak (31M, 1.9h)
- Sursilvan (61M, 4.8h)
- Tamil (8.8M, 0.6h)
- Vallader (14M, 1.2h)
- Vietnamese (1.2M, 0.1h)
Medium-resourced (>10 & <100 hours):
- Czech (0.3G, 24h)
- Dutch (0.8G, 70h)
- Estonian (0.2G, 19h)
- Esperanto (1.3G, 77h)
- Indonesian (0.1G, 11h)
- Kyrgyz (0.1G, 12h)
- Mongolian (0.1G, 12h)
- Portuguese (0.7G, 58h)
- Swedish (0.1G, 12h)
- Tatar (4G, 30h)
- Turkish (1.3G, 29h)
- Ukrainian (0.2G, 18h)
Hig-resourced (>100 hours):
- Basque (1.7G, 118h)
- Catalan (8.7G, 615h)
- English (26G, 1957h)
- French (9.3G, 754h)
- German (14G, 1083h)
- Italian (2.2G, 155h)
- Kinyarwanda (6.1G, 422h)
- Persian (4.5G, 327h)
- Polish (1.8G, 130h)
- Russian (2.1G, 137h)
- Spanish (4.9G, 349h)
- Welsh (4.5G, 108h)
Dataset Structure
Data Instances
{'file': 'абзар_common_voice_tt_17737010.opus',
'is_valid': True,
'language': 0,
'speaker_id': '687025afd5ce033048472754c8d2cb1cf8a617e469866bbdb3746e2bb2194202094a715906f91feb1c546893a5d835347f4869e7def2e360ace6616fb4340e38',
'gender': 0,
'keyword': 'абзар',
'audio': {'path': 'абзар_common_voice_tt_17737010.opus',
'array': array([2.03458695e-34, 2.03458695e-34, 2.03458695e-34, ...,
2.03458695e-34, 2.03458695e-34, 2.03458695e-34]),
'sampling_rate': 48000}}
Data Fields
- file: strinrelative audio path inside the archive
- is_valid: if a sample is valid
- language: language of an instance. Makes sense only when providing multiple languages to the
dataset loader (for example,
load_dataset("ml_spoken_words", languages=["ar", "tt"])
) - speaker_id: unique id of a speaker. Can be "NA" if an instance is invalid
- gender: speaker gender. Can be one of
["MALE", "FEMALE", "OTHER", "NAN"]
- keyword: word spoken in a current sample
- audio: a dictionary containing the relative path to the audio file,
the decoded audio array, and the sampling rate.
Note that when accessing the audio column:
dataset[0]["audio"]
the audio file is automatically decoded and resampled todataset.features["audio"].sampling_rate
. Decoding and resampling of a large number of audio files might take a significant amount of time. Thus, it is important to first query the sample index before the "audio" column, i.e.dataset[0]["audio"]
should always be preferred overdataset["audio"][0]
Data Splits
The data for each language is splitted into train / validation / test parts.
Dataset Creation
Curation Rationale
[More Information Needed]
Source Data
Initial Data Collection and Normalization
The data comes form Common Voice dataset.
Who are the source language producers?
[More Information Needed]
Annotations
Annotation process
[More Information Needed]
Who are the annotators?
[More Information Needed]
Personal and Sensitive Information
he dataset consists of people who have donated their voice online. You agree to not attempt to determine the identity of speakers.
Considerations for Using the Data
Social Impact of Dataset
[More Information Needed]
Discussion of Biases
[More Information Needed]
Other Known Limitations
[More Information Needed]
Additional Information
Dataset Curators
[More Information Needed]
Licensing Information
The dataset is licensed under CC-BY 4.0 and can be used for academic research and commercial applications in keyword spotting and spoken term search.
Citation Information
@inproceedings{mazumder2021multilingual,
title={Multilingual Spoken Words Corpus},
author={Mazumder, Mark and Chitlangia, Sharad and Banbury, Colby and Kang, Yiping and Ciro, Juan Manuel and Achorn, Keith and Galvez, Daniel and Sabini, Mark and Mattson, Peter and Kanter, David and others},
booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)},
year={2021}
}
Contributions
Thanks to @polinaeterna for adding this dataset.
- Downloads last month
- 549