conversation_id
string
speaker
string
turn_number
int16
0
523
tokens
sequence
transcriptions
sequence
pos_tags_prf
sequence
pos_tags_ud
sequence
"TN001-DR300497-WAI3C"
"A"
0
[ "喂", "遲", "啲", "去", "唔", "去", "旅行", "啊", "?", "你", "老公", "有冇", "平", "機票", "啊", "?" ]
[ "wai3", "ci4", "di1", "heoi3", "m4", "heoi3", "leoi5hang4", "aa3", "VQ6", "nei5", "lou5gung1", "jau5mou5", "peng4", "gei1piu3", "aa3", "VQ6" ]
[ 24, 9, 72, 75, 21, 75, 80, 116, 83, 64, 50, 76, 9, 50, 116, 83 ]
[ 7, 11, 6, 5, 4, 5, 14, 6, 0, 10, 14, 5, 11, 14, 6, 0 ]
"TN001-DR300497-PENG4"
"B"
1
[ "平", "機票", "要", "淡季", "先", "有得", "平", "𡃉", "喎", "。", "而家", "旺", "-", "。" ]
[ "peng4", "gei1piu3", "jiu3", "daam6gwai3", "sin1", "jau5dak1", "peng4", "gaa3", "wo3", "VQ1", "ji4gaa1", "wong6", "VQ2", "VQ1" ]
[ 9, 50, 81, 13, 21, 81, 9, 116, 116, 83, 68, 9, 83, 83 ]
[ 11, 14, 8, 14, 4, 8, 11, 6, 6, 0, 4, 11, 0, 0 ]
"TN001-DR300497-MOU5D"
"A"
2
[ "冇得", "去", "嗱", "。" ]
[ "mou5dak1", "heoi3", "laa4", "VQ1" ]
[ 81, 75, 116, 83 ]
[ 8, 5, 6, 0 ]
"TN001-DR300497-HAA5V"
"B"
3
[ "吓", "?" ]
[ "haa5", "VQ6" ]
[ 24, 83 ]
[ 7, 0 ]
"TN001-DR300497-JI4GA"
"B"
4
[ "而家", "旺季", "。", "通常", "都", "係", "貴", "𡃉", "喎", ",", "啲", "機票", "。" ]
[ "ji4gaa1", "wong6gwai3", "VQ1", "tung1soeng4", "dou1", "hai6", "gwai3", "gaa3", "wo3", "VQ2", "di1", "gei1piu3", "VQ1" ]
[ 68, 13, 83, 21, 21, 75, 9, 116, 116, 83, 62, 50, 83 ]
[ 4, 14, 0, 4, 4, 5, 11, 6, 6, 0, 14, 14, 0 ]
"TN001-DR300497-HAI6M"
"A"
5
[ "係", "咩", "?", "我", "聽", "朋友", "講", "話", "去", ",", "誒", ",", "Orlando", "嗰個", "舊", "-", "嗰個", "迪士尼", "呢", ",", "廿五", "週年", "喎", "。", "好", "抵", "玩", "喎", "。", "直程", "可以", "喺", "裏邊", "嗰啲", "酒店", "住", "𠻹", "喎", ",", "佢", "話", "。" ]
[ "hai6", "me1", "VQ6", "ngo5", "teng1", "pang4jau5", "gong2", "waa6", "heoi3", "VQ2", "e6", "VQ2", "Orlando0", "go2go3", "gau6", "VQ2", "go2go3", "dik6si6nei4", "ne1", "VQ2", "jaa6ng5", "zau1nin4", "wo3", "VQ1", "hou2", "dai2", "waan2", "wo3", "VQ1", "zik6cing4", "ho2ji5", "hai2", "leoi5bin6", "go2di1", "zau2dim3", "zyu6", "tim1", "wo3", "VQ2", "keoi5", "waa6", "VQ1" ]
[ 75, 116, 83, 64, 75, 50, 75, 75, 75, 83, 24, 83, 102, 64, 9, 83, 64, 56, 118, 83, 46, 62, 116, 83, 21, 9, 75, 116, 83, 21, 81, 60, 26, 64, 50, 75, 116, 116, 83, 64, 75, 83 ]
[ 5, 6, 0, 10, 5, 14, 5, 5, 5, 0, 7, 0, 15, 10, 11, 0, 10, 15, 6, 0, 9, 14, 6, 0, 4, 11, 5, 6, 0, 4, 8, 3, 4, 10, 14, 5, 6, 6, 0, 10, 5, 0 ]
"TN001-DR300497-DAAN6"
"B"
6
[ "但", "你哋", "講", "嗰啲", "係", "," ]
[ "daan6", "nei5dei6", "gong2", "go2di1", "hai6", "VQ2" ]
[ 18, 64, 75, 64, 75, 83 ]
[ 2, 10, 5, 10, 5, 0 ]
"TN001-DR300497-GAN1L"
"A"
7
[ "跟", "旅行社", "去", "囖", "。" ]
[ "gan1", "leoi5hang4se5", "heoi3", "lo1", "VQ1" ]
[ 75, 50, 75, 116, 83 ]
[ 5, 14, 5, 6, 0 ]
"TN001-DR300497-HIGH0"
"B"
8
[ "High", "season", "去", "𡃉", "喎", "。" ]
[ "high0", "season0", "heoi3", "gaa3", "wo3", "VQ1" ]
[ 85, 99, 75, 116, 116, 83 ]
[ 11, 14, 5, 6, 6, 0 ]
"TN001-DR300497-CAT1B"
"A"
9
[ "七", "八月", "嗰陣時", "囖", "。", "但係", "都", "幾", "貴", "喎", "。", "都", "要", "成", "萬四", "蚊", ",", "四", "五", "日", "。", "淨係", "去", "Orlando", "嗰度", "玩", "咋", "喎", "。", "冇", "嘢", "做", "。" ]
[ "cat1", "baat3jyut6", "go2zan6si4", "lo1", "VQ1", "daan6hai6", "dou1", "gei2", "gwai3", "wo3", "VQ1", "dou1", "jiu3", "seng4", "maan6sei3", "man1", "VQ2", "sei3", "ng5", "jat6", "VQ1", "zing6hai6", "heoi3", "Orlando0", "go2dou6", "waan2", "zaa3", "wo3", "VQ1", "mou5", "je5", "zou6", "VQ1" ]
[ 46, 68, 64, 116, 83, 18, 21, 21, 9, 116, 83, 21, 81, 46, 46, 62, 83, 46, 46, 62, 83, 21, 75, 102, 64, 75, 116, 116, 83, 76, 50, 75, 83 ]
[ 9, 4, 10, 6, 0, 2, 4, 4, 11, 6, 0, 4, 8, 9, 9, 14, 0, 9, 9, 14, 0, 4, 5, 15, 10, 5, 6, 6, 0, 5, 14, 5, 0 ]
"TN001-DR300497-GAM2M"
"B"
10
[ "噉", "咪", "食", "同", "玩", "咪", "啱", "。", "但係", "我", "-" ]
[ "gam2", "mai6", "sik6", "tung4", "waan2", "mai6", "aam1", "VQ1", "daan6hai6", "ngo5", "VQ2" ]
[ 64, 21, 75, 18, 75, 21, 9, 83, 18, 64, 83 ]
[ 10, 4, 5, 2, 5, 4, 11, 0, 2, 10, 0 ]
"TN001-DR300497-HAI6L"
"A"
11
[ "係", "裏邊", "食", "囖", ",", "裏邊", "冇", "乜嘢", "食", "𡃉", "咋", "喎", "。", "係", "唔係", "啊", "?", "你", "都", "去", "過", "喇", "。" ]
[ "hai6", "leoi5bin6", "sik6", "lo1", "VQ2", "leoi5bin6", "mou5", "mat1je5", "sik6", "gaa3", "zaa3", "wo3", "VQ1", "hai6", "m4hai6", "aa3", "VQ6", "nei5", "dou1", "heoi3", "gwo3", "laa1", "VQ1" ]
[ 75, 26, 75, 116, 83, 26, 76, 64, 75, 116, 116, 116, 83, 75, 75, 116, 83, 64, 21, 75, 72, 116, 83 ]
[ 5, 4, 5, 6, 0, 4, 5, 10, 5, 6, 6, 6, 0, 5, 5, 6, 0, 10, 4, 5, 6, 6, 0 ]
"TN001-DR300497-HAI6S"
"B"
12
[ "係", "食", "嗰啲", "乜嘢", "漢堡包", "," ]
[ "hai6", "sik6", "go2di1", "mat1je5", "hon3bou2baau1", "VQ2" ]
[ 75, 75, 64, 64, 50, 83 ]
[ 5, 5, 10, 10, 14, 0 ]
"TN001-DR300497-MAI6H"
"A"
13
[ "咪", "係", "囖", "。" ]
[ "mai6", "hai6", "lo1", "VQ1" ]
[ 18, 75, 116, 83 ]
[ 2, 5, 6, 0 ]
"TN001-DR300497-SYU4T"
"B"
14
[ "薯條", "𡃉", "咋", "。" ]
[ "syu4tiu2", "gaa3", "zaa3", "VQ1" ]
[ 50, 116, 116, 83 ]
[ 14, 6, 6, 0 ]
"TN001-DR300497-WAAN2"
"A"
15
[ "玩", "嗰啲", ",", "玩", "乜嘢", "啊", "?", "Magic", "Kingdom", "嗰啲", "囖", "。", "係", "唔係", "啊", "?" ]
[ "waan2", "go2di1", "VQ2", "waan2", "mat1je5", "aa3", "VQ6", "Magic0", "Kingdom0", "go2di1", "lo1", "VQ1", "hai6", "m4hai6", "aa3", "VQ6" ]
[ 75, 64, 83, 75, 64, 116, 83, 84, 99, 64, 116, 83, 75, 75, 116, 83 ]
[ 5, 10, 0, 5, 10, 6, 0, 1, 14, 10, 6, 0, 5, 5, 6, 0 ]
"TN001-DR300497-WAAN2"
"B"
16
[ "玩", "就", ",", "多", "嘢", "睇", "多", "嘢", "玩", "。" ]
[ "waan2", "zau6", "VQ2", "do1", "je5", "tai2", "do1", "je5", "waan2", "VQ1" ]
[ 75, 21, 83, 9, 50, 75, 9, 50, 75, 83 ]
[ 5, 4, 0, 11, 14, 5, 11, 14, 5, 0 ]
"TN001-DR300497-BAT1G"
"A"
17
[ "不過", "幾", "日", "都", "好", "悶", "啫", ",", "喺", "晒", "裏邊", "。" ]
[ "bat1gwo3", "gei2", "jat6", "dou1", "hou2", "mun6", "ze1", "VQ2", "hai2", "saai3", "leoi5bin6", "VQ1" ]
[ 18, 46, 62, 21, 21, 9, 116, 83, 75, 72, 26, 83 ]
[ 2, 9, 14, 4, 4, 11, 6, 0, 5, 6, 4, 0 ]
"TN001-DR300497-M4HAI"
"B"
18
[ "唔係", "喎", "。", "佢", "好", "大", "𡃉", "喎", "。", "你", "一", "日", "玩", "一", "個", "場", "喎", ",", "真係", "。" ]
[ "m4hai6", "wo3", "VQ1", "keoi5", "hou2", "daai6", "gaa3", "wo3", "VQ1", "nei5", "jat1", "jat6", "waan2", "jat1", "go3", "coeng4", "wo3", "VQ2", "zan1hai6", "VQ1" ]
[ 75, 116, 83, 64, 21, 9, 116, 116, 83, 64, 46, 62, 75, 46, 62, 50, 116, 83, 21, 83 ]
[ 5, 6, 0, 10, 4, 11, 6, 6, 0, 10, 9, 14, 5, 9, 14, 14, 6, 0, 4, 0 ]
"TN001-DR300497-HOU2#"
"A"
19
[ "好", "○", "𡃉", "。", "玩", "咗", "咁", "多", "日", "。" ]
[ "hou2", "#", "gaa3", "VQ1", "waan2", "zo2", "gam3", "do1", "jat6", "VQ1" ]
[ 21, 2, 116, 83, 75, 72, 21, 9, 62, 83 ]
[ 4, 1, 6, 0, 5, 6, 4, 11, 14, 0 ]
"TN001-DR300497-GAM2M"
"B"
20
[ "噉", "咪", "瞓覺", "囖", "。" ]
[ "gam2", "mai6", "fan3gaau3", "lo1", "VQ1" ]
[ 64, 21, 75, 116, 83 ]
[ 10, 4, 5, 6, 0 ]
"TN001-DR300497-CI1SI"
"A"
21
[ "黐線", "。", "搭", "飛機", "去", "過", "嗰邊", "瞓覺", "呀", "?" ]
[ "ci1sin3", "VQ1", "daap3", "fei1gei1", "heoi3", "gwo3", "go2bin1", "fan3gaau3", "aa4", "VQ6" ]
[ 44, 83, 75, 50, 79, 75, 64, 75, 116, 83 ]
[ 1, 0, 5, 14, 5, 5, 10, 5, 6, 0 ]
"TN001-DR300497-GAM2F"
"B"
22
[ "噉", "放假", "係", "relax", "。", "係", "噉", "𡃉", "喇", "。" ]
[ "gam2", "fong3gaa3", "hai6", "relax0", "VQ1", "hai6", "gam2", "gaa3", "laa1", "VQ1" ]
[ 18, 75, 75, 112, 83, 75, 64, 116, 116, 83 ]
[ 2, 5, 5, 5, 0, 5, 10, 6, 6, 0 ]
"TN001-DR300497-BAT1G"
"A"
23
[ "不過", "都", ",", "不過", "真係", "好", "大", "裏邊", "。", "佢", "直程", "可以", "俾", "餅", "帶", "你", "呢", "。", "借", "俾", "你", "睇", "喎", ",", "睇", "下", "裏面", "有", "啲", "乜嘢", "酒店", "啊", ",", "同埋", "有", "啲", "乜嘢", "嘢", "玩", "嚹", "。", "直程", "可以", "租", ",", "借", "餅", "帶", "俾", "你", "返", "屋企", "睇", "喎", "。" ]
[ "bat1gwo3", "dou1", "VQ2", "bat1gwo3", "zan1hai6", "hou2", "daai6", "leoi5bin6", "VQ1", "keoi5", "zik6cing4", "ho2ji5", "bei2", "beng2", "daai2", "nei5", "ne1", "VQ1", "ze3", "bei2", "nei5", "tai2", "wo3", "VQ2", "tai2", "haa5", "leoi5min6", "jau5", "di1", "mat1je5", "zau2dim3", "aa3", "VQ2", "tung4maai4", "jau5", "di1", "mat1je5", "je5", "waan2", "laa3", "VQ1", "zik6cing4", "ho2ji5", "zou1", "VQ2", "ze3", "beng2", "daai2", "bei2", "nei5", "faan1", "uk1kei2", "tai2", "wo3", "VQ1" ]
[ 18, 21, 83, 18, 21, 21, 9, 26, 83, 64, 21, 81, 75, 62, 50, 64, 118, 83, 75, 60, 64, 75, 118, 83, 75, 72, 26, 76, 62, 64, 50, 116, 83, 18, 76, 62, 64, 50, 75, 116, 83, 21, 81, 75, 83, 75, 62, 50, 60, 64, 75, 50, 75, 116, 83 ]
[ 2, 4, 0, 2, 4, 4, 11, 4, 0, 10, 4, 8, 5, 14, 14, 10, 6, 0, 5, 3, 10, 5, 6, 0, 5, 6, 4, 5, 14, 10, 14, 6, 0, 2, 5, 14, 10, 14, 5, 6, 0, 4, 8, 5, 0, 5, 14, 14, 3, 10, 5, 14, 5, 6, 0 ]
"TN001-DR300497-GAM2A"
"B"
24
[ "噉", "呀", ",", "我", "好", "啲", "喎", "。" ]
[ "gam2", "aa4", "VQ2", "ngo5", "hou2", "di1", "wo3", "VQ1" ]
[ 18, 116, 83, 64, 9, 72, 116, 83 ]
[ 2, 6, 0, 10, 11, 6, 6, 0 ]
"TN001-DR300497-DIM2G"
"A"
25
[ "點解", "啊", "?" ]
[ "dim2gaai2", "aa3", "VQ6" ]
[ 64, 116, 83 ]
[ 10, 6, 0 ]
"TN001-DR300497-JAN1W"
"B"
26
[ "因為", "我", "老公", "做", "哩", "行", "。", "我", "可以", "唔使", "話", "。", "乜嘢", "都", "問", "佢", "。", "我", "唔使", "記", "。" ]
[ "jan1wai6", "ngo5", "lou5gung1", "zou6", "ni1", "hong4", "VQ1", "ngo5", "ho2ji5", "m4sai2", "waa6", "VQ1", "mat1je5", "dou1", "man6", "keoi5", "VQ1", "ngo5", "m4sai2", "gei3", "VQ1" ]
[ 18, 64, 50, 75, 64, 50, 83, 64, 81, 81, 75, 83, 64, 21, 75, 64, 83, 64, 81, 75, 83 ]
[ 2, 10, 14, 5, 10, 14, 0, 10, 8, 8, 5, 0, 10, 4, 5, 10, 0, 10, 8, 5, 0 ]
"TN001-DR300497-HEOI3"
"A"
27
[ "去", "過", "幾", "次", "唧", ",", "你", "老公", "Orlando", "嗰度", "?" ]
[ "heoi3", "gwo3", "gei2", "ci3", "zek1", "VQ2", "nei5", "lou5gung1", "Orlando0", "go2dou6", "VQ6" ]
[ 75, 72, 64, 62, 116, 83, 64, 50, 102, 64, 83 ]
[ 5, 6, 10, 14, 6, 0, 10, 14, 15, 10, 0 ]
"TN001-DR300497-NGO5M"
"B"
28
[ "我", "唔", "知", "𡃉", ",", "噉", "平", "咪", "去", "囖", "。" ]
[ "ngo5", "m4", "zi1", "gaa3", "VQ2", "gam2", "peng4", "mai6", "heoi3", "lo1", "VQ1" ]
[ 64, 21, 75, 116, 83, 18, 9, 21, 75, 116, 83 ]
[ 10, 4, 5, 6, 0, 2, 11, 4, 5, 6, 0 ]
"TN001-DR300497-HAA2V"
"A"
29
[ "吓", "?" ]
[ "haa2", "VQ6" ]
[ 24, 83 ]
[ 7, 0 ]
"TN001-DR300497-ZIK1H"
"B"
30
[ "即係", "不過", "要", "係", "淡季", "嘅", "時候", "呢", "。", "就", "唔", "唔", "-", ",", "一定", "唔", "會", "七", "八月", "𡃉", "嚹", "。" ]
[ "zik1hai6", "bat1gwo3", "jiu3", "hai6", "daam6gwai3", "ge3", "si4hau6", "ne1", "VQ1", "zau6", "m4", "m4", "VQ2", "VQ2", "jat1ding6", "m4", "wui5", "cat1", "baat3jyut6", "gaa3", "laa3", "VQ1" ]
[ 21, 18, 81, 75, 13, 72, 50, 116, 83, 21, 21, 21, 83, 83, 21, 21, 81, 46, 68, 116, 116, 83 ]
[ 4, 2, 8, 5, 14, 6, 14, 6, 0, 4, 4, 4, 0, 0, 4, 4, 8, 9, 4, 6, 6, 0 ]
"TN001-DR300497-HAA2V"
"A"
31
[ "吓", ",", "咪", "重", "好", "。", "唔使", "咁", "多", "人", "排長", "-", ",", "排隊", "。" ]
[ "haa2", "VQ2", "mai6", "zung6", "hou2", "VQ1", "m4sai2", "gam3", "do1", "jan4", "paai4coeng4", "VQ2", "VQ2", "paai4deoi2", "VQ1" ]
[ 24, 83, 21, 21, 9, 83, 75, 21, 9, 50, 75, 83, 83, 75, 83 ]
[ 7, 0, 4, 4, 11, 0, 5, 4, 11, 14, 5, 0, 0, 5, 0 ]
"TN001-DR300497-GAM2H"
"B"
32
[ "噉", "係", "啊", "。", "但係", "要", "就", "啱", "佢", "啲", "假期", "囖", "。" ]
[ "gam2", "hai6", "aa3", "VQ1", "daan6hai6", "jiu3", "zau6", "aam1", "keoi5", "di1", "gaa3kei4", "lo1", "VQ1" ]
[ 18, 75, 116, 83, 18, 81, 75, 9, 64, 62, 50, 116, 83 ]
[ 2, 5, 6, 0, 2, 8, 5, 11, 10, 14, 14, 6, 0 ]
"TN001-DR300497-NGO5S"
"A"
33
[ "我", "上次", "去", ",", "七", "八月", "嗰陣時", "去", "呢", ",", "排", "一", "笪", "地方", "玩", "啊", "都", "成", "半", "粒", "鐘", "啊" ]
[ "ngo5", "soeng6ci3", "heoi3", "VQ2", "cat1", "baat3jyut6", "go2zan6si4", "heoi3", "ne1", "VQ2", "paai4", "jat1", "daat3", "dei6fong1", "waan2", "aa3", "dou1", "seng4", "bun3", "lap1", "zung1", "aa3" ]
[ 64, 68, 75, 83, 46, 68, 64, 75, 118, 83, 78, 46, 62, 50, 75, 116, 21, 46, 46, 62, 50, 116 ]
[ 10, 4, 5, 0, 9, 4, 10, 5, 6, 0, 5, 9, 14, 14, 5, 6, 4, 9, 9, 14, 14, 6 ]
"TN001-DR300497-CAA1M"
"B"
34
[ "差唔多", "咋", "我", "上次", "去", "都", "係", "。", "淡季", "去", "都", "要", "啊", "。" ]
[ "caa1m4do1", "zaa3", "ngo5", "soeng6ci3", "heoi3", "dou1", "hai6", "VQ1", "daam6gwai3", "heoi3", "dou1", "jiu3", "aa3", "VQ1" ]
[ 21, 116, 64, 68, 75, 21, 75, 83, 13, 75, 21, 81, 116, 83 ]
[ 4, 6, 10, 4, 5, 4, 5, 0, 14, 5, 4, 8, 6, 0 ]
"TN001-DR300497-DAAM6"
"A"
35
[ "淡季", "去", "你", "幾時", "啊", "?" ]
[ "daam6gwai3", "heoi3", "nei5", "gei2si4", "aa3", "VQ6" ]
[ 13, 75, 64, 64, 116, 83 ]
[ 14, 5, 10, 10, 6, 0 ]
"TN001-DR300497-GAU2J"
"B"
36
[ "九月", "中", "。" ]
[ "gau2jyut6", "zung1", "VQ1" ]
[ 68, 26, 83 ]
[ 4, 4, 0 ]
"TN001-DR300497-HAI6L"
"A"
37
[ "係", "囖", "。", "九月", "中", "都", "咁", "多", "人", "去", "呀", "。" ]
[ "hai6", "lo1", "VQ1", "gau2jyut6", "zung1", "dou1", "gam3", "do1", "jan4", "heoi3", "aa4", "VQ1" ]
[ 75, 116, 83, 68, 26, 21, 21, 9, 50, 75, 116, 83 ]
[ 5, 6, 0, 4, 4, 4, 4, 11, 14, 5, 6, 0 ]
"TN001-DR300497-HAI6A"
"B"
38
[ "係", "啊", "。" ]
[ "hai6", "aa3", "VQ1" ]
[ 75, 116, 83 ]
[ 5, 6, 0 ]
"TN001-DR300497-GO2BI"
"A"
39
[ "嗰邊", "都", "有", "人", "未", "放", "完", "暑假", "𡃉", "咩", "?" ]
[ "go2bin1", "dou1", "jau5", "jan4", "mei6", "fong3", "jyun4", "syu2gaa3", "gaa3", "me1", "VQ6" ]
[ 64, 21, 76, 50, 21, 78, 72, 50, 116, 116, 83 ]
[ 10, 4, 5, 14, 4, 5, 6, 14, 6, 6, 0 ]
"TN001-DR300497-M4CIN"
"B"
40
[ "唔", "清楚", "。", "但係", "有啲", "-", "有啲", "遊戲", "係", "要", "排", "咁", "耐", "," ]
[ "m4", "cing1co2", "VQ1", "daan6hai6", "jau5di1", "VQ2", "jau5di1", "jau4hei3", "hai6", "jiu3", "paai4", "gam3", "noi6", "VQ2" ]
[ 21, 9, 83, 18, 64, 83, 64, 50, 75, 81, 78, 21, 9, 83 ]
[ 4, 11, 0, 2, 10, 0, 10, 14, 5, 8, 5, 4, 11, 0 ]
"TN001-DR300497-ZIK1H"
"A"
41
[ "即係", "好多" ]
[ "zik1hai6", "hou2do1" ]
[ 21, 46 ]
[ 4, 9 ]
"TN001-DR300497-DAAN6"
"B"
42
[ "但係", "有啲", "唔使", "。", "有啲", "好", "快", "。" ]
[ "daan6hai6", "jau5di1", "m4sai2", "VQ1", "jau5di1", "hou2", "faai3", "VQ1" ]
[ 18, 64, 81, 83, 64, 21, 9, 83 ]
[ 2, 10, 8, 0, 10, 4, 11, 0 ]
"TN001-DR300497-HAI6M"
"A"
43
[ "係", "咩", "?" ]
[ "hai6", "me1", "VQ6" ]
[ 75, 116, 83 ]
[ 5, 6, 0 ]
"TN001-DR300497-DAAN6"
"B"
44
[ "但係", "都", "唔係", "好", "迫人", "囖", "。", "變", "咗", "你", "可以", "睇", "嘢", ",", "睇", "煙花", "嗰啲", "呢", "就", "唔使", "咁", "迫", "。" ]
[ "daan6hai6", "dou1", "m4hai6", "hou2", "bik1jan4", "lo1", "VQ1", "bin3", "zo2", "nei5", "ho2ji5", "tai2", "je5", "VQ2", "tai2", "jin1faa1", "go2di1", "ne1", "zau6", "m4sai2", "gam3", "bik1", "VQ1" ]
[ 18, 21, 75, 21, 9, 116, 83, 75, 72, 64, 81, 75, 50, 83, 75, 50, 64, 118, 21, 81, 21, 9, 83 ]
[ 2, 4, 5, 4, 11, 6, 0, 5, 6, 10, 8, 5, 14, 0, 5, 14, 10, 6, 4, 8, 4, 11, 0 ]
"TN001-DR300497-HAI6L"
"A"
45
[ "係", "囖", ",", "係", "囖", "。", "好多", "人", "嚹", "我", "上次", "。", "等", "到", "頸", "都", "長", "埋", "啊", "。", "來", "嚹", ",", "下次", "去", "邊", "嚹", "?" ]
[ "hai6", "lo1", "VQ2", "hai6", "lo1", "VQ1", "hou2do1", "jan4", "laa3", "ngo5", "soeng6ci3", "VQ1", "dang2", "dou3", "geng2", "dou1", "coeng4", "maai4", "aa3", "VQ1", "lai4", "laa3", "VQ2", "haa6ci3", "heoi3", "bin1", "laa3", "VQ6" ]
[ 75, 116, 83, 75, 116, 83, 46, 50, 116, 64, 68, 83, 75, 72, 50, 21, 9, 72, 116, 83, 75, 116, 83, 68, 75, 64, 116, 83 ]
[ 5, 6, 0, 5, 6, 0, 9, 14, 6, 10, 4, 0, 5, 6, 14, 4, 11, 6, 6, 0, 5, 6, 0, 4, 5, 10, 6, 0 ]
"TN001-DR300497-MEI6D"
"B"
46
[ "未", "定", "啊", "。", "要", "等", "我", "老公", ",", "睇", "下", "佢", "嗰度", "有", "乜嘢", "平", "嘅", ",", "即係", "平", "嘅", "package", ",", "咪", "睇", "下", "去", "邊", "囖", "。" ]
[ "mei6", "ding6", "aa3", "VQ1", "jiu3", "dang2", "ngo5", "lou5gung1", "VQ2", "tai2", "haa5", "keoi5", "go2dou6", "jau5", "mat1je5", "peng4", "ge3", "VQ2", "zik1hai6", "peng4", "ge3", "package0", "VQ2", "mai6", "tai2", "haa5", "heoi3", "bin1", "lo1", "VQ1" ]
[ 21, 75, 116, 83, 81, 75, 64, 50, 83, 75, 72, 64, 64, 76, 64, 9, 72, 83, 21, 9, 72, 99, 83, 21, 75, 72, 75, 64, 116, 83 ]
[ 4, 5, 6, 0, 8, 5, 10, 14, 0, 5, 6, 10, 10, 5, 10, 11, 6, 0, 4, 11, 6, 14, 0, 4, 5, 6, 5, 10, 6, 0 ]
"TN001-DR300497-NEI5S"
"A"
47
[ "你", "想", "去", "邊", "喇", "噉樣", "?" ]
[ "nei5", "soeng2", "heoi3", "bin1", "laa1", "gam2joeng2", "VQ6" ]
[ 64, 81, 75, 64, 116, 64, 83 ]
[ 10, 8, 5, 10, 6, 10, 0 ]
"TN001-DR300497-DAAN6"
"B"
48
[ "但係", "我", "最", "想", "去", "澳洲", "紐西蘭", "嗰邊", ",", "因為", "都", "未", "去", "過", "。" ]
[ "daan6hai6", "ngo5", "zeoi3", "soeng2", "heoi3", "ou3zau1", "nau5sai1laan4", "go2bin1", "VQ2", "jan1wai6", "dou1", "mei6", "heoi3", "gwo3", "VQ1" ]
[ 18, 64, 21, 81, 75, 54, 54, 64, 83, 18, 21, 21, 75, 72, 83 ]
[ 2, 10, 4, 8, 5, 15, 15, 10, 0, 2, 4, 4, 5, 6, 0 ]
"TN001-DR300497-HAI6A"
"A"
49
[ "係", "呀", "?", "澳洲", "你", "都", "未", "去", "過", "咩", "?" ]
[ "hai6", "aa4", "VQ6", "ou3zau1", "nei5", "dou1", "mei6", "heoi3", "gwo3", "me1", "VQ6" ]
[ 75, 116, 83, 54, 64, 21, 21, 75, 72, 116, 83 ]
[ 5, 6, 0, 15, 10, 4, 4, 5, 6, 6, 0 ]
"TN001-DR300497-M6VQ2"
"B"
50
[ "嗯", ",", "未", "啊", "。" ]
[ "m6", "VQ2", "mei6", "aa3", "VQ1" ]
[ 24, 83, 21, 116, 83 ]
[ 7, 0, 4, 6, 0 ]
"TN001-DR300497-O3VQ2"
"A"
51
[ "哦", ",", "我", "以為", "淨係", "未", ",", "紐西蘭", "未", "去", "過", "𠻹", "。", "不過", "我", "都", "想", "去", "紐西蘭", "。" ]
[ "o3", "VQ2", "ngo5", "ji5wai4", "zing6hai6", "mei6", "VQ2", "nau5sai1laan4", "mei6", "heoi3", "gwo3", "tim1", "VQ1", "bat1gwo3", "ngo5", "dou1", "soeng2", "heoi3", "nau5sai1laan4", "VQ1" ]
[ 24, 83, 64, 75, 21, 21, 83, 54, 21, 75, 72, 116, 83, 18, 64, 21, 81, 75, 54, 83 ]
[ 7, 0, 10, 5, 4, 4, 0, 15, 4, 5, 6, 6, 0, 2, 10, 4, 8, 5, 15, 0 ]
"TN001-DR300497-M6VQ2"
"B"
52
[ "嗯", ",", "咪", "去", "親", "都", "係", "棱", "埋", "紐西蘭", "一齊", "𡃉", "喇", "?" ]
[ "m6", "VQ2", "mai6", "heoi3", "can1", "dou1", "hai6", "lang3", "maai4", "nau5sai1laan4", "jat1cai4", "gaa3", "laa1", "VQ6" ]
[ 24, 83, 21, 75, 72, 21, 75, 75, 72, 54, 21, 116, 116, 83 ]
[ 7, 0, 4, 5, 6, 4, 5, 5, 6, 15, 4, 6, 6, 0 ]
"TN001-DR300497-M4HAI"
"A"
53
[ "唔係", "啊", "。", "上次", "我", "淨係", "去", "澳洲", "咋", "。", "冇", "去", "紐西蘭", "嚹", "。" ]
[ "m4hai6", "aa3", "VQ1", "soeng6ci3", "ngo5", "zing6hai6", "heoi3", "ou3zau1", "zaa3", "VQ1", "mou5", "heoi3", "nau5sai1laan4", "laa3", "VQ1" ]
[ 75, 116, 83, 68, 64, 21, 75, 54, 116, 83, 76, 75, 54, 116, 83 ]
[ 5, 6, 0, 4, 10, 4, 5, 15, 6, 0, 5, 5, 15, 6, 0 ]
"TN001-DR300497-M4HAI"
"B"
54
[ "唔係", "即係", "嗰頭", ",", "同埋", "嗰", "-", "嗰邊", "。", "噉", "變", "咗", "你", "係", "嘞", ",", "唔使", "坐", "咁", "耐", "機", "喇", ",", "一次過", "坐", "晒", "咯", "。" ]
[ "m4hai6", "zik1hai6", "go2tau4", "VQ2", "tung4maai4", "go2", "VQ2", "go2bin1", "VQ1", "gam2", "bin3", "zo2", "nei5", "hai6", "laak3", "VQ2", "m4sai2", "co5", "gam3", "noi6", "gei1", "laa1", "VQ2", "jat1ci3gwo3", "co5", "saai3", "lok3", "VQ1" ]
[ 75, 21, 64, 83, 18, 64, 83, 64, 83, 18, 75, 72, 64, 75, 116, 83, 81, 75, 21, 9, 52, 84, 83, 21, 75, 72, 116, 83 ]
[ 5, 4, 10, 0, 2, 10, 0, 10, 0, 2, 5, 6, 10, 5, 6, 0, 8, 5, 4, 11, 14, 1, 0, 4, 5, 6, 6, 0 ]
"TN001-DR300497-M4SAI"
"A"
55
[ "唔使", "分開", "兩", "次", "。", "不過", "新西", "-", "紐西蘭", "係", "唔係", "都", "-", "紐西蘭", "定", "新西蘭", "唧", "究竟", "?" ]
[ "m4sai2", "fan1hoi1", "loeng5", "ci3", "VQ1", "bat1gwo3", "san1sai1", "VQ2", "nau5sai1laan4", "hai6", "m4hai6", "dou1", "VQ2", "nau5sai1laan4", "ding6", "san1sai1laan4", "zek1", "gau3ging2", "VQ6" ]
[ 81, 75, 46, 62, 83, 18, 55, 83, 54, 75, 75, 21, 83, 54, 18, 54, 116, 21, 83 ]
[ 8, 5, 9, 14, 0, 2, 15, 0, 15, 5, 5, 4, 0, 15, 2, 15, 6, 4, 0 ]
"TN001-DR300497-E6VQ2"
"B"
56
[ "誒", ",", "有", "人", "叫", "New Zealand", "即係", "紐", ",", "又", "照", "譯", "紐西蘭", "。", "有", "啲", "叫做", "新西蘭", "。", "其實", "都", "係", "一樣", ",", "一", "個", "地方", "。" ]
[ "e6", "VQ2", "jau5", "jan4", "giu3", "New_Zealand0", "zik1hai6", "nau5", "VQ2", "jau6", "ziu3", "jik6", "nau5sai1laan4", "VQ1", "jau5", "di1", "giu3zou6", "san1sai1laan4", "VQ1", "kei4sat6", "dou1", "hai6", "jat1joeng6", "VQ2", "jat1", "go3", "dei6fong1", "VQ1" ]
[ 24, 83, 76, 50, 75, 102, 21, 84, 83, 21, 77, 75, 54, 83, 76, 62, 75, 54, 83, 21, 21, 75, 9, 83, 46, 62, 50, 83 ]
[ 7, 0, 5, 14, 5, 15, 4, 1, 0, 4, 4, 5, 15, 0, 5, 14, 5, 15, 0, 4, 4, 5, 11, 0, 9, 14, 14, 0 ]
"TN001-DR300497-HAI6L"
"A"
57
[ "係", "囖", "。", "好似", "幾", "靚", "喎", "啲", "風景", ",", "係", "唔係", "啊", "?" ]
[ "hai6", "lo1", "VQ1", "hou2ci5", "gei2", "leng3", "wo3", "di1", "fung1ging2", "VQ2", "hai6", "m4hai6", "aa3", "VQ6" ]
[ 75, 116, 83, 75, 21, 9, 116, 62, 50, 83, 75, 75, 116, 83 ]
[ 5, 6, 0, 5, 4, 11, 6, 14, 14, 0, 5, 5, 6, 0 ]
"TN001-DR300497-M6VQ2"
"B"
58
[ "嗯", ",", "係", "啊", "。" ]
[ "m6", "VQ2", "hai6", "aa3", "VQ1" ]
[ 24, 83, 75, 116, 83 ]
[ 7, 0, 5, 6, 0 ]
"TN001-DR300497-HAI6L"
"A"
59
[ "係", "囖", "。" ]
[ "hai6", "lo1", "VQ1" ]
[ 75, 116, 83 ]
[ 5, 6, 0 ]
"TN001-DR300497-JAU5H"
"B"
60
[ "有", "海豚", "定係", "有", "鯨魚", "睇", "𡃉", "。", "有啲", "觀團", ",", "可以", "出海", "。" ]
[ "jau5", "hoi2tyun4", "ding6hai6", "jau5", "king4jyu4", "tai2", "gaa3", "VQ1", "jau5di1", "gun1tyun4", "VQ2", "ho2ji5", "ceot1hoi2", "VQ1" ]
[ 76, 50, 18, 76, 50, 75, 116, 83, 64, 50, 83, 81, 75, 83 ]
[ 5, 14, 2, 5, 14, 5, 6, 0, 10, 14, 0, 8, 5, 0 ]
"TN001-DR300497-NEI5D"
"A"
61
[ "你", "都", "唔", "游水", "。" ]
[ "nei5", "dou1", "m4", "jau4seoi2", "VQ1" ]
[ 64, 21, 21, 75, 83 ]
[ 10, 4, 4, 5, 0 ]
"TN001-DR300497-DAAN6"
"B"
62
[ "但係", "要", "-", "唔係", "啊", ",", "嗰啲", "要", "夾", "啱", "時間", "𡃉", "。", "要", "睇", "佢", "個", "嗰", "嗰", "段", "時間", "啱", "呢", "就", "有", "一", "羣", "𡃉", "。" ]
[ "daan6hai6", "jiu3", "VQ2", "m4hai6", "aa3", "VQ2", "go2di1", "jiu3", "gaap3", "aam1", "si4gaan3", "gaa3", "VQ1", "jiu3", "tai2", "keoi5", "go3", "go2", "go2", "dyun6", "si4gaan3", "aam1", "ne1", "zau6", "jau5", "jat1", "kwan4", "gaa3", "VQ1" ]
[ 18, 81, 83, 75, 116, 83, 64, 81, 75, 9, 50, 116, 83, 81, 75, 64, 62, 64, 64, 62, 50, 9, 118, 21, 76, 46, 62, 116, 83 ]
[ 2, 8, 0, 5, 6, 0, 10, 8, 5, 11, 14, 6, 0, 8, 5, 10, 14, 10, 10, 14, 14, 11, 6, 4, 5, 9, 14, 6, 0 ]
"TN001-DR300497-NI1GO"
"A"
63
[ "哩個", "去", "-", "哩個", "去", "帕斯", "咋", "喎", "。" ]
[ "ni1go3", "heoi3", "VQ2", "ni1go3", "heoi3", "paak3si1", "zaa3", "wo3", "VQ1" ]
[ 64, 75, 83, 64, 75, 54, 116, 116, 83 ]
[ 10, 5, 0, 10, 5, 15, 6, 6, 0 ]
"TN001-DR300497-HAI6L"
"B"
64
[ "係", "囖", "咪", "澳洲", "嗰邊", "囖", ",", "Perth", "。" ]
[ "hai6", "lo1", "mai6", "ou3zau1", "go2bin1", "lo1", "VQ2", "Perth0", "VQ1" ]
[ 75, 116, 21, 54, 64, 116, 83, 102, 83 ]
[ 5, 6, 4, 15, 10, 6, 0, 15, 0 ]
"TN001-DR300497-HAI6A"
"A"
65
[ "係", "啊", ",", "係", "啊", "。" ]
[ "hai6", "aa3", "VQ2", "hai6", "aa3", "VQ1" ]
[ 75, 116, 83, 75, 116, 83 ]
[ 5, 6, 0, 5, 6, 0 ]
"TN001-DR300497-HAI6L"
"B"
66
[ "係", "囖", "。" ]
[ "hai6", "lo1", "VQ1" ]
[ 75, 116, 83 ]
[ 5, 6, 0 ]
"TN001-DR300497-BAT1G"
"A"
67
[ "不過", "澳洲", "好似", "冇", "乜", ",", "但係", "哩個", "Perth", "好似", "好", "," ]
[ "bat1gwo3", "ou3zau1", "hou2ci5", "mou5", "mat1", "VQ2", "daan6hai6", "ni1go3", "Perth0", "hou2ci5", "hou2", "VQ2" ]
[ 18, 54, 75, 76, 64, 83, 18, 64, 102, 75, 21, 83 ]
[ 2, 15, 5, 5, 10, 0, 2, 10, 15, 5, 4, 0 ]
"TN001-DR300497-DAAN6"
"B"
68
[ "但係", "你", "join", "佢", "嗰啲", "tour", "都", "-", "都", "未必", "一定", "包", "一定", "睇", "得", "到", "。", "即係", "佢", "嗰個", "叫做", "唔", "知", "觀", "鯨", "團", "定係", "乜嘢", "噉樣", "𡃉", "。", "就", "唔", "一定", "包", "你", "睇", "到", "囖", "。", "即係", "佢", "會", "有", "-", "有", "隻", "船", "出海", "。", "噉樣", "就" ]
[ "daan6hai6", "nei5", "join0", "keoi5", "go2di1", "tour0", "dou1", "VQ2", "dou1", "mei6bit1", "jat1ding6", "baau1", "jat1ding6", "tai2", "dak1", "dou2", "VQ1", "zik1hai6", "keoi5", "go2go3", "giu3zou6", "m4", "zi1", "gun1", "king4", "tyun4", "ding6hai6", "mat1je5", "gam2joeng2", "gaa3", "VQ1", "zau6", "m4", "jat1ding6", "baau1", "nei5", "tai2", "dou2", "lo1", "VQ1", "zik1hai6", "keoi5", "wui5", "jau5", "VQ2", "jau5", "zek3", "syun4", "ceot1hoi2", "VQ1", "gam2joeng2", "zau6" ]
[ 18, 64, 112, 64, 64, 99, 21, 83, 21, 21, 21, 75, 21, 75, 72, 72, 83, 21, 64, 64, 75, 21, 75, 75, 50, 50, 18, 64, 64, 116, 83, 21, 21, 21, 75, 64, 75, 72, 116, 83, 21, 64, 81, 76, 83, 76, 62, 50, 75, 83, 64, 21 ]
[ 2, 10, 5, 10, 10, 14, 4, 0, 4, 4, 4, 5, 4, 5, 6, 6, 0, 4, 10, 10, 5, 4, 5, 5, 14, 14, 2, 10, 10, 6, 0, 4, 4, 4, 5, 10, 5, 6, 6, 0, 4, 10, 8, 5, 0, 5, 14, 14, 5, 0, 10, 4 ]
"TN001-DR300497-ZIK1H"
"A"
69
[ "即係", "到時", "自己", "join", "嗰啲", "local", "tour", "嚹", "喎", ",", "意思", "係", "。" ]
[ "zik1hai6", "dou3si4", "zi6gei2", "join0", "go2di1", "local0", "tour0", "laa3", "wo3", "VQ2", "ji3si1", "hai6", "VQ1" ]
[ 21, 68, 64, 112, 64, 85, 99, 116, 116, 83, 50, 75, 83 ]
[ 4, 4, 10, 5, 10, 11, 14, 6, 6, 0, 14, 5, 0 ]
"TN001-DR300497-E6VQ2"
"B"
70
[ "誒", ",", "如果", "係", "自己", "去", "就", "係", "囖", "。", "係", "囖", ",", "即係", "佢", "有", "啲", "噉", "嘅", "團", "𡃉", "直頭", "。" ]
[ "e6", "VQ2", "jyu4gwo2", "hai6", "zi6gei2", "heoi3", "zau6", "hai6", "lo1", "VQ1", "hai6", "lo1", "VQ2", "zik1hai6", "keoi5", "jau5", "di1", "gam2", "ge3", "tyun4", "gaa3", "zik6tau4", "VQ1" ]
[ 24, 83, 18, 75, 64, 75, 21, 75, 116, 83, 75, 116, 83, 18, 64, 76, 62, 64, 72, 50, 116, 21, 83 ]
[ 7, 0, 2, 5, 10, 5, 4, 5, 6, 0, 5, 6, 0, 2, 10, 5, 14, 10, 6, 14, 6, 4, 0 ]
"TN001-DR300497-HAI6M"
"A"
71
[ "係", "咩", "?" ]
[ "hai6", "me1", "VQ6" ]
[ 75, 116, 83 ]
[ 5, 6, 0 ]
"TN001-DR300497-HAI6A"
"B"
72
[ "係", "啊", "。" ]
[ "hai6", "aa3", "VQ1" ]
[ 75, 116, 83 ]
[ 5, 6, 0 ]
"TN001-DR300497-ZING6"
"A"
73
[ "淨係", "睇", "鯨魚", "呀", "?", "有", "乜嘢", "睇", "啊", "?" ]
[ "zing6hai6", "tai2", "king4jyu4", "aa4", "VQ6", "jau5", "mat1je5", "tai2", "aa3", "VQ6" ]
[ 21, 75, 50, 116, 83, 76, 64, 75, 116, 83 ]
[ 4, 5, 14, 6, 0, 5, 10, 5, 6, 0 ]
"TN001-DR300497-HAI2G"
"B"
74
[ "喺", "個", "海", "度", "。", "噉", "係", "啊", ",", "係", "睇", "海", "啊", ",", "睇", "珊瑚", "嗰啲", "嘢", "囖", "。" ]
[ "hai2", "go3", "hoi2", "dou6", "VQ1", "gam2", "hai6", "aa3", "VQ2", "hai6", "tai2", "hoi2", "aa3", "VQ2", "tai2", "saan1wu4", "go2di1", "je5", "lo1", "VQ1" ]
[ 60, 62, 50, 64, 83, 18, 75, 116, 83, 75, 75, 50, 116, 83, 75, 50, 64, 50, 116, 83 ]
[ 3, 14, 14, 10, 0, 2, 5, 6, 0, 5, 5, 14, 6, 0, 5, 14, 10, 14, 6, 0 ]
"TN001-DR300497-O3ZIK"
"A"
75
[ "哦", "即係", "餵", "佢", "食", "嘢", "嗰啲", "嚹", "喎", "。" ]
[ "o3", "zik1hai6", "wai3", "keoi5", "sik6", "je5", "go2di1", "laa3", "wo3", "VQ1" ]
[ 24, 21, 75, 64, 75, 50, 64, 116, 116, 83 ]
[ 7, 4, 5, 10, 5, 14, 10, 6, 6, 0 ]
"TN001-DR300497-M4HAI"
"B"
76
[ "唔係", "啊", "。", "佢", "哩個", "你", "係", "○", "佢", "嗰", "隻", "唔係", "𡃉", "佢", "嗰", "隻", "係", "睇", "天然", "嗰啲", "𡃉", "喎", ",", "唔係", "人哋", "養", "𡃉", "喎", "。", "直頭", "喺", "個", "海", "你", "係", "等", "佢", "。", "即係", "可能", "嗰個", "時期", "係", "一", "群", "噉", "出來", "呢", ",", "所以", "有", "時間", "性", "囖", "。", "你", "要", "睇", "啱", "邊個", "時間", "去", ",", "先至", "有得", "睇", "𡃉", "。", "唔係", "你", "要", "去", "就", "有得", "睇", "𡃉", "嚹", "。" ]
[ "m4hai6", "aa3", "VQ1", "keoi5", "ni1go3", "nei5", "hai6", "#", "keoi5", "go2", "zek3", "m4hai6", "gaa3", "keoi5", "go2", "zek3", "hai6", "tai2", "tin1jin4", "go2di1", "gaa3", "wo3", "VQ2", "m4hai6", "jan4dei6", "joeng5", "gaa3", "wo3", "VQ1", "zik6tau4", "hai2", "go3", "hoi2", "nei5", "hai6", "dang2", "keoi5", "VQ1", "zik1hai6", "ho2nang4", "go2go3", "si4kei4", "hai6", "jat1", "kwan4", "gam2", "ceot1lai4", "ne1", "VQ2", "so2ji5", "jau5", "si4gaan3", "sing3", "lo1", "VQ1", "nei5", "jiu3", "tai2", "aam1", "bin1go3", "si4gaan3", "heoi3", "VQ2", "sin1zi3", "jau5dak1", "tai2", "gaa3", "VQ1", "m4hai6", "nei5", "jiu3", "heoi3", "zau6", "jau5dak1", "tai2", "gaa3", "laa3", "VQ1" ]
[ 75, 116, 83, 64, 64, 64, 75, 2, 64, 64, 62, 75, 116, 64, 64, 62, 75, 75, 15, 64, 116, 116, 83, 75, 64, 75, 116, 116, 83, 21, 60, 62, 50, 64, 75, 75, 64, 83, 21, 81, 64, 50, 75, 46, 62, 64, 75, 118, 83, 18, 76, 50, 41, 116, 83, 64, 81, 75, 9, 64, 50, 75, 83, 21, 81, 75, 116, 83, 75, 64, 81, 75, 21, 81, 75, 116, 116, 83 ]
[ 5, 6, 0, 10, 10, 10, 5, 1, 10, 10, 14, 5, 6, 10, 10, 14, 5, 5, 11, 10, 6, 6, 0, 5, 10, 5, 6, 6, 0, 4, 3, 14, 14, 10, 5, 5, 10, 0, 4, 8, 10, 14, 5, 9, 14, 10, 5, 6, 0, 2, 5, 14, 1, 6, 0, 10, 8, 5, 11, 10, 14, 5, 0, 4, 8, 5, 6, 0, 5, 10, 8, 5, 4, 8, 5, 6, 6, 0 ]
"TN001-DR300497-O3VQ2"
"A"
77
[ "哦", ",", "即係", "睇", "下", "你", "幾時", "去", ",", "又", "撞", "啱", "有", "啲", "團", ",", "噉", "先", "有得", "參加", "囖", "。", "係", "唔係", "啊", "?" ]
[ "o3", "VQ2", "zik1hai6", "tai2", "haa5", "nei5", "gei2si4", "heoi3", "VQ2", "jau6", "zong6", "aam1", "jau5", "di1", "tyun4", "VQ2", "gam2", "sin1", "jau5dak1", "caam1gaa1", "lo1", "VQ1", "hai6", "m4hai6", "aa3", "VQ6" ]
[ 24, 83, 18, 75, 72, 64, 64, 75, 83, 21, 75, 9, 76, 62, 50, 83, 18, 21, 81, 75, 116, 83, 75, 75, 116, 83 ]
[ 7, 0, 2, 5, 6, 10, 10, 5, 0, 4, 5, 11, 5, 14, 14, 0, 2, 4, 8, 5, 6, 0, 5, 5, 6, 0 ]
"TN001-DR300497-DAAN6"
"B"
78
[ "但係", "我", "唔", "知", "邊個", "月份", "係", "睇", "鯨魚", "最", "好", "囖", "。" ]
[ "daan6hai6", "ngo5", "m4", "zi1", "bin1go3", "jyut6fan6", "hai6", "tai2", "king4jyu4", "zeoi3", "hou2", "lo1", "VQ1" ]
[ 18, 64, 21, 75, 62, 50, 75, 75, 50, 21, 9, 116, 83 ]
[ 2, 10, 4, 5, 14, 14, 5, 5, 14, 4, 11, 6, 0 ]
"TN001-DR300497-SYUN3"
"A"
79
[ "算", "喇", "。", "我", "對", "鯨魚", "都", "冇乜", "興趣", "。" ]
[ "syun3", "laa1", "VQ1", "ngo5", "deoi3", "king4jyu4", "dou1", "mou5mat1", "hing3ceoi3", "VQ1" ]
[ 75, 116, 83, 64, 60, 50, 21, 64, 50, 83 ]
[ 5, 6, 0, 10, 3, 14, 4, 10, 14, 0 ]
"TN001-DR300497-AA3VQ"
"B"
80
[ "啊", ",", "我", "想", "睇", "下", "喎", "。" ]
[ "aa3", "VQ2", "ngo5", "soeng2", "tai2", "haa5", "wo3", "VQ1" ]
[ 24, 83, 64, 81, 75, 72, 116, 83 ]
[ 7, 0, 10, 8, 5, 6, 6, 0 ]
"TN001-DR300497-HAI6M"
"A"
81
[ "係", "咩", "?" ]
[ "hai6", "me1", "VQ6" ]
[ 75, 116, 83 ]
[ 5, 6, 0 ]
"TN001-DR300497-HAI6J"
"B"
82
[ "係", "喲", "。", "因為", "未", "見", "過", "咁", "大", "條", "鯨魚", "吖", "嗎", "。" ]
[ "hai6", "jo3", "VQ1", "jan1wai6", "mei6", "gin3", "gwo3", "gam3", "daai6", "tiu4", "king4jyu4", "aa1", "maa3", "VQ1" ]
[ 75, 116, 83, 18, 21, 75, 72, 21, 9, 62, 50, 116, 116, 83 ]
[ 5, 6, 0, 2, 4, 5, 6, 4, 11, 14, 14, 6, 6, 0 ]
"TN001-DR300497-NGAAU"
"A"
83
[ "咬", "唔", "咬", "人", "𡃉", "?" ]
[ "ngaau5", "m4", "ngaau5", "jan4", "gaa3", "VQ6" ]
[ 75, 21, 75, 50, 116, 83 ]
[ 5, 4, 5, 14, 6, 0 ]
"TN001-DR300497-GAM2N"
"B"
84
[ "噉", "我", "諗", "我", "諗", "即係", "遠", "觀", "𡃉", "吖", "嗎", "。" ]
[ "gam2", "ngo5", "nam2", "ngo5", "nam2", "zik1hai6", "jyun5", "gun1", "gaa3", "aa1", "maa3", "VQ1" ]
[ 18, 64, 75, 64, 75, 21, 9, 75, 116, 116, 116, 83 ]
[ 2, 10, 5, 10, 5, 4, 11, 5, 6, 6, 6, 0 ]
"TN001-DR300497-JAU5M"
"A"
85
[ "有冇", "危險", "𡃉", "?", "大佬", "。" ]
[ "jau5mou5", "ngai4him2", "gaa3", "VQ6", "daai6lou2", "VQ1" ]
[ 76, 50, 116, 83, 50, 83 ]
[ 5, 14, 6, 0, 14, 0 ]
"TN001-DR300497-GAM2M"
"B"
86
[ "噉", "唔同", "睇", "香港", "嗰啲", "海洋公園", "嗰啲", ",", "都", "係", "細", "吖", "嗎", "屬於", "。" ]
[ "gam2", "m4tung4", "tai2", "hoeng1gong2", "go2di1", "hoi2joeng4gung1jyun2", "go2di1", "VQ2", "dou1", "hai6", "sai3", "aa1", "maa3", "suk6jyu1", "VQ1" ]
[ 18, 75, 75, 54, 62, 56, 64, 83, 21, 75, 9, 116, 116, 75, 83 ]
[ 2, 5, 5, 15, 14, 15, 10, 0, 4, 5, 11, 6, 6, 5, 0 ]
"TN001-DR300497-GAM2Z"
"A"
87
[ "噉", "就", "係", ",", "吓", "?", "海洋公園", "嗰啲", "屬於", "細", "呀", "?", "嗰啲", "大", "成", "點", "啊", "?" ]
[ "gam2", "zau6", "hai6", "VQ2", "haa2", "VQ6", "hoi2joeng4gung1jyun2", "go2di1", "suk6jyu1", "sai3", "aa4", "VQ6", "go2di1", "daai6", "seng4", "dim2", "aa3", "VQ6" ]
[ 18, 21, 75, 83, 24, 83, 56, 64, 75, 9, 116, 83, 64, 9, 72, 64, 116, 83 ]
[ 2, 4, 5, 0, 7, 0, 15, 10, 5, 11, 6, 0, 10, 11, 6, 10, 6, 0 ]
"TN001-DR300497-M4ZI1"
"B"
88
[ "唔", "知", "喲", "。", "有時", "睇", "即係", "睇", "佢", "嗰啲", "catalog", "嗰啲", "圖片", "呢", ",", "嗰", "條", "尾", "都", "好", "大", "條", "𡃉", "喎", "。" ]
[ "m4", "zi1", "jo3", "VQ1", "jau5si4", "tai2", "zik1hai6", "tai2", "keoi5", "go2di1", "catalog0", "go2di1", "tou4pin2", "ne1", "VQ2", "go2", "tiu4", "mei5", "dou1", "hou2", "daai6", "tiu4", "gaa3", "wo3", "VQ1" ]
[ 21, 75, 116, 83, 21, 75, 21, 75, 64, 64, 99, 64, 50, 118, 83, 64, 62, 50, 21, 21, 9, 62, 116, 116, 83 ]
[ 4, 5, 6, 0, 4, 5, 4, 5, 10, 10, 14, 10, 14, 6, 0, 10, 14, 14, 4, 4, 11, 14, 6, 6, 0 ]
"TN001-DR300497-TIU4M"
"A"
89
[ "條", "尾", "都", "發", "死", "人", "喇", ",", "係", "唔係", "啊", "?" ]
[ "tiu4", "mei5", "dou1", "faat3", "sei2", "jan4", "laa1", "VQ2", "hai6", "m4hai6", "aa3", "VQ6" ]
[ 62, 50, 21, 75, 75, 50, 116, 83, 75, 75, 116, 83 ]
[ 14, 14, 4, 5, 5, 14, 6, 0, 5, 5, 6, 0 ]
"TN001-DR300497-ZAU6H"
"B"
90
[ "就", "係", "想", "去", "睇", "下", "啲", "咁", ",", "即係", "天然", "嘅", "嘢", "囖", "。" ]
[ "zau6", "hai6", "soeng2", "heoi3", "tai2", "haa5", "di1", "gam3", "VQ2", "zik1hai6", "tin1jin4", "ge3", "je5", "lo1", "VQ1" ]
[ 21, 75, 81, 75, 75, 72, 62, 21, 83, 21, 15, 72, 50, 116, 83 ]
[ 4, 5, 8, 5, 5, 6, 14, 4, 0, 4, 11, 6, 14, 6, 0 ]
"TN001-DR300497-DAAN6"
"A"
91
[ "但", "會", "唔", "會", "好", "貴", "啊", "睇", "哩啲", "?", "貴", "唔", "貴", "?" ]
[ "daan6", "wui5", "m4", "wui5", "hou2", "gwai3", "aa3", "tai2", "ni1di1", "VQ6", "gwai3", "m4", "gwai3", "VQ6" ]
[ 18, 81, 21, 81, 21, 9, 116, 75, 64, 83, 9, 21, 9, 83 ]
[ 2, 8, 4, 8, 4, 11, 6, 5, 10, 0, 11, 4, 11, 0 ]
"TN001-DR300497-GAM2M"
"B"
92
[ "噉", "唔", "知", "啊", "。", "價錢", "唔", "知", "啊", "。", "睇", "下", "到時", "去", "先至", "-", "唔係", "喇", ",", "去", "先至", "check", "價錢", "啊", "嗰啲", "嘢", "囖", "。" ]
[ "gam2", "m4", "zi1", "aa3", "VQ1", "gaa3cin4", "m4", "zi1", "aa3", "VQ1", "tai2", "haa5", "dou3si4", "heoi3", "sin1zi3", "VQ2", "m4hai6", "laa1", "VQ2", "heoi3", "sin1zi3", "check0", "gaa3cin4", "aa3", "go2di1", "je5", "lo1", "VQ1" ]
[ 18, 21, 75, 116, 83, 50, 21, 75, 116, 83, 75, 72, 68, 75, 21, 83, 75, 116, 83, 75, 21, 112, 50, 116, 64, 50, 116, 83 ]
[ 2, 4, 5, 6, 0, 14, 4, 5, 6, 0, 5, 6, 4, 5, 4, 0, 5, 6, 0, 5, 4, 5, 14, 6, 10, 14, 6, 0 ]
"TN001-DR300497-BAT1G"
"A"
93
[ "不過", "唔緊要", "喇", "。", "噠", "你", "老公", "個", "朵", "。", "幾時", "度", "有得", "睇", "唧", "噉", "哩啲", ",", "知", "唔", "知", "大概", "?" ]
[ "bat1gwo3", "m4gan2jiu3", "laa1", "VQ1", "daat3", "nei5", "lou5gung1", "go3", "do2", "VQ1", "gei2si4", "dou2", "jau5dak1", "tai2", "zek1", "gam2", "ni1di1", "VQ2", "zi1", "m4", "zi1", "daai6koi3", "VQ6" ]
[ 18, 43, 116, 83, 75, 64, 50, 62, 50, 83, 64, 46, 81, 75, 116, 64, 64, 83, 75, 21, 75, 21, 83 ]
[ 2, 1, 6, 0, 5, 10, 14, 14, 14, 0, 10, 9, 8, 5, 6, 10, 10, 0, 5, 4, 5, 4, 0 ]
"TN001-DR300497-NGO5M"
"B"
94
[ "我", "唔", "知", "𡃉", "。", "我", "唔", "知", "個", "月份", "係", "幾時", "啊", "。", "但", "我", "知", "有", "哩啲", "嘢", "睇", "。" ]
[ "ngo5", "m4", "zi1", "gaa3", "VQ1", "ngo5", "m4", "zi1", "go3", "jyut6fan6", "hai6", "gei2si4", "aa3", "VQ1", "daan6", "ngo5", "zi1", "jau5", "ni1di1", "je5", "tai2", "VQ1" ]
[ 64, 21, 75, 116, 83, 64, 21, 75, 62, 50, 75, 64, 116, 83, 18, 64, 75, 76, 64, 50, 75, 83 ]
[ 10, 4, 5, 6, 0, 10, 4, 5, 14, 14, 5, 10, 6, 0, 2, 10, 5, 5, 10, 14, 5, 0 ]
"TN001-DR300497-HAI6M"
"A"
95
[ "係", "咩", "?", "我", "又", "唔係", "好", "知", "喎", "。" ]
[ "hai6", "me1", "VQ6", "ngo5", "jau6", "m4hai6", "hou2", "zi1", "wo3", "VQ1" ]
[ 75, 116, 83, 64, 21, 75, 21, 75, 116, 83 ]
[ 5, 6, 0, 10, 4, 5, 4, 5, 6, 0 ]
"TN001-DR300497-DAAN6"
"B"
96
[ "但係", "睇", "好似", "如果", "照", "哩啲", "報紙", "噉", "影", "出來", "," ]
[ "daan6hai6", "tai2", "hou2ci5", "jyu4gwo2", "ziu3", "ni1di1", "bou3zi2", "gam2", "jing2", "ceot1lai4", "VQ2" ]
[ 18, 75, 75, 18, 75, 64, 50, 64, 75, 75, 83 ]
[ 2, 5, 5, 2, 5, 10, 14, 10, 5, 5, 0 ]
"TN001-DR300497-JING1"
"B"
97
[ "應該", "係", "夏天", "噉", "嘅", "氣候", ",", "係", "喇", "。" ]
[ "jing1goi1", "hai6", "haa6tin1", "gam2", "ge3", "hei3hau6", "VQ2", "hai6", "laa1", "VQ1" ]
[ 81, 75, 68, 64, 72, 50, 83, 75, 116, 83 ]
[ 8, 5, 4, 10, 6, 14, 0, 5, 6, 0 ]
"TN001-DR300497-GWAI3"
"A"
98
[ "季節", "。" ]
[ "gwai3zit3", "VQ1" ]
[ 50, 83 ]
[ 14, 0 ]
"TN001-DR300497-DOU1J"
"A"
99
[ "都", "應該", "係", "喇", "。", "你", "喺", "個", "海", "嗰度", "你", "梗係", "着", "泳褲", "喇", ",", "唔係", "着", "乜嘢", "啊", "。" ]
[ "dou1", "jing1goi1", "hai6", "laa1", "VQ1", "nei5", "hai2", "go3", "hoi2", "go2dou6", "nei5", "gang2hai6", "zoek3", "wing6fu3", "laa1", "VQ2", "m4hai6", "zoek3", "mat1je5", "aa3", "VQ1" ]
[ 21, 81, 75, 116, 83, 64, 60, 62, 50, 64, 64, 21, 75, 50, 116, 83, 75, 75, 64, 116, 83 ]
[ 4, 8, 5, 6, 0, 10, 3, 14, 14, 10, 10, 4, 5, 14, 6, 0, 5, 5, 10, 6, 0 ]

Dataset Card for The Hong Kong Cantonese Corpus (HKCanCor)

Dataset Summary

The Hong Kong Cantonese Corpus (HKCanCor) comprise transcribed conversations recorded between March 1997 and August 1998. It contains recordings of spontaneous speech (51 texts) and radio programmes (42 texts), which involve 2 to 4 speakers, with 1 text of monologue.

In total, the corpus contains around 230,000 Chinese words. The text is word-segmented (i.e., tokenization is at word-level, and each token can span multiple Chinese characters). Tokens are annotated with part-of-speech (POS) tags and romanised Cantonese pronunciation.

  • Romanisation
    • Follows conventions set by the Linguistic Society of Hong Kong (LSHK).
  • POS
    • The tagset used by this corpus extends the one in the Peita-Fujitsu-Renmin Ribao (PRF) corpus (Duan et al., 2000). Extensions were made to further capture Cantonese-specific phenomena.
    • To facilitate everyday usage and for better comparability across languages and/or corpora, this dataset also includes the tags mapped to the Universal Dependencies 2.0 format. This mapping references the PyCantonese library.

Supported Tasks and Leaderboards

[More Information Needed]

Languages

Yue Chinese / Cantonese (Hong Kong).

Dataset Structure

This corpus has 10801 utterances and approximately 230000 Chinese words. There is no predefined split.

Data Instances

Each instance contains a conversation id, speaker id within that conversation, turn number, part-of-speech tag for each Chinese word in the PRF format and UD2.0 format, and the utterance written in Chinese characters as well as its LSHK format romanisation.

For example:

{
    'conversation_id': 'TNR016-DR070398-HAI6V'
    'pos_tags_prf': ['v', 'w'], 
    'pos_tags_ud': ['VERB', 'PUNCT'],
    'speaker': 'B', 
    'transcriptions': ['hai6', 'VQ1'], 
    'turn_number': 112, 
    'tokens': ['係', '。']
}

Data Fields

  • conversation_id: unique dialogue-level id
  • pos_tags_prf: POS tag using the PRF format at token-level
  • pos_tag_ud: POS tag using the UD2.0 format at token-level
  • speaker: unique speaker id within dialogue
  • transcriptions: token-level romanisation in the LSHK format
  • turn_number: turn number in dialogue
  • tokens: Chinese word or punctuation at token-level

Data Splits

There are no specified splits in this dataset.

Dataset Creation

Curation Rationale

[More Information Needed]

Source Data

Initial Data Collection and Normalization

[More Information Needed]

Who are the source language producers?

[More Information Needed]

Annotations

Annotation process

[More Information Needed]

Who are the annotators?

[More Information Needed]

Personal and Sensitive Information

[More Information Needed]

Considerations for Using the Data

Social Impact of Dataset

[More Information Needed]

Discussion of Biases

[More Information Needed]

Other Known Limitations

[More Information Needed]

Additional Information

Dataset Curators

[More Information Needed]

Licensing Information

This work is licensed under a Creative Commons Attribution 4.0 International License.

Citation Information

This corpus was developed by Luke and Wong, 2015.

@article{luke2015hong,
  author={Luke, Kang-Kwong and Wong, May LY},
  title={The Hong Kong Cantonese corpus: design and uses},
  journal={Journal of Chinese Linguistics},
  year={2015},
  pages={309-330},
  month={12}
}

The POS tagset to Universal Dependency tagset mapping is provided by Jackson Lee, as a part of the PyCantonese library.

@misc{lee2020,
  author = {Lee, Jackson},
  title = {PyCantonese: Cantonese Linguistics and NLP in Python},
  year = {2020},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/jacksonllee/pycantonese}},
  commit = {1d58f44e1cb097faa69de6b617e1d28903b84b98}
}

Contributions

Thanks to @j-chim for adding this dataset.

Downloads last month
476