Datasets:
conversation_id
string
| speaker
string
| turn_number
int16
0
523
| tokens
sequence
| transcriptions
sequence
| pos_tags_prf
sequence
| pos_tags_ud
sequence
|
---|---|---|---|---|---|---|
"TN001-DR300497-WAI3C" | "A" | 0 | [
"喂",
"遲",
"啲",
"去",
"唔",
"去",
"旅行",
"啊",
"?",
"你",
"老公",
"有冇",
"平",
"機票",
"啊",
"?"
] | [
"wai3",
"ci4",
"di1",
"heoi3",
"m4",
"heoi3",
"leoi5hang4",
"aa3",
"VQ6",
"nei5",
"lou5gung1",
"jau5mou5",
"peng4",
"gei1piu3",
"aa3",
"VQ6"
] | [
24,
9,
72,
75,
21,
75,
80,
116,
83,
64,
50,
76,
9,
50,
116,
83
] | [
7,
11,
6,
5,
4,
5,
14,
6,
0,
10,
14,
5,
11,
14,
6,
0
] |
"TN001-DR300497-PENG4" | "B" | 1 | [
"平",
"機票",
"要",
"淡季",
"先",
"有得",
"平",
"𡃉",
"喎",
"。",
"而家",
"旺",
"-",
"。"
] | [
"peng4",
"gei1piu3",
"jiu3",
"daam6gwai3",
"sin1",
"jau5dak1",
"peng4",
"gaa3",
"wo3",
"VQ1",
"ji4gaa1",
"wong6",
"VQ2",
"VQ1"
] | [
9,
50,
81,
13,
21,
81,
9,
116,
116,
83,
68,
9,
83,
83
] | [
11,
14,
8,
14,
4,
8,
11,
6,
6,
0,
4,
11,
0,
0
] |
"TN001-DR300497-MOU5D" | "A" | 2 | [
"冇得",
"去",
"嗱",
"。"
] | [
"mou5dak1",
"heoi3",
"laa4",
"VQ1"
] | [
81,
75,
116,
83
] | [
8,
5,
6,
0
] |
"TN001-DR300497-HAA5V" | "B" | 3 | [
"吓",
"?"
] | [
"haa5",
"VQ6"
] | [
24,
83
] | [
7,
0
] |
"TN001-DR300497-JI4GA" | "B" | 4 | [
"而家",
"旺季",
"。",
"通常",
"都",
"係",
"貴",
"𡃉",
"喎",
",",
"啲",
"機票",
"。"
] | [
"ji4gaa1",
"wong6gwai3",
"VQ1",
"tung1soeng4",
"dou1",
"hai6",
"gwai3",
"gaa3",
"wo3",
"VQ2",
"di1",
"gei1piu3",
"VQ1"
] | [
68,
13,
83,
21,
21,
75,
9,
116,
116,
83,
62,
50,
83
] | [
4,
14,
0,
4,
4,
5,
11,
6,
6,
0,
14,
14,
0
] |
"TN001-DR300497-HAI6M" | "A" | 5 | [
"係",
"咩",
"?",
"我",
"聽",
"朋友",
"講",
"話",
"去",
",",
"誒",
",",
"Orlando",
"嗰個",
"舊",
"-",
"嗰個",
"迪士尼",
"呢",
",",
"廿五",
"週年",
"喎",
"。",
"好",
"抵",
"玩",
"喎",
"。",
"直程",
"可以",
"喺",
"裏邊",
"嗰啲",
"酒店",
"住",
"𠻹",
"喎",
",",
"佢",
"話",
"。"
] | [
"hai6",
"me1",
"VQ6",
"ngo5",
"teng1",
"pang4jau5",
"gong2",
"waa6",
"heoi3",
"VQ2",
"e6",
"VQ2",
"Orlando0",
"go2go3",
"gau6",
"VQ2",
"go2go3",
"dik6si6nei4",
"ne1",
"VQ2",
"jaa6ng5",
"zau1nin4",
"wo3",
"VQ1",
"hou2",
"dai2",
"waan2",
"wo3",
"VQ1",
"zik6cing4",
"ho2ji5",
"hai2",
"leoi5bin6",
"go2di1",
"zau2dim3",
"zyu6",
"tim1",
"wo3",
"VQ2",
"keoi5",
"waa6",
"VQ1"
] | [
75,
116,
83,
64,
75,
50,
75,
75,
75,
83,
24,
83,
102,
64,
9,
83,
64,
56,
118,
83,
46,
62,
116,
83,
21,
9,
75,
116,
83,
21,
81,
60,
26,
64,
50,
75,
116,
116,
83,
64,
75,
83
] | [
5,
6,
0,
10,
5,
14,
5,
5,
5,
0,
7,
0,
15,
10,
11,
0,
10,
15,
6,
0,
9,
14,
6,
0,
4,
11,
5,
6,
0,
4,
8,
3,
4,
10,
14,
5,
6,
6,
0,
10,
5,
0
] |
"TN001-DR300497-DAAN6" | "B" | 6 | [
"但",
"你哋",
"講",
"嗰啲",
"係",
","
] | [
"daan6",
"nei5dei6",
"gong2",
"go2di1",
"hai6",
"VQ2"
] | [
18,
64,
75,
64,
75,
83
] | [
2,
10,
5,
10,
5,
0
] |
"TN001-DR300497-GAN1L" | "A" | 7 | [
"跟",
"旅行社",
"去",
"囖",
"。"
] | [
"gan1",
"leoi5hang4se5",
"heoi3",
"lo1",
"VQ1"
] | [
75,
50,
75,
116,
83
] | [
5,
14,
5,
6,
0
] |
"TN001-DR300497-HIGH0" | "B" | 8 | [
"High",
"season",
"去",
"𡃉",
"喎",
"。"
] | [
"high0",
"season0",
"heoi3",
"gaa3",
"wo3",
"VQ1"
] | [
85,
99,
75,
116,
116,
83
] | [
11,
14,
5,
6,
6,
0
] |
"TN001-DR300497-CAT1B" | "A" | 9 | [
"七",
"八月",
"嗰陣時",
"囖",
"。",
"但係",
"都",
"幾",
"貴",
"喎",
"。",
"都",
"要",
"成",
"萬四",
"蚊",
",",
"四",
"五",
"日",
"。",
"淨係",
"去",
"Orlando",
"嗰度",
"玩",
"咋",
"喎",
"。",
"冇",
"嘢",
"做",
"。"
] | [
"cat1",
"baat3jyut6",
"go2zan6si4",
"lo1",
"VQ1",
"daan6hai6",
"dou1",
"gei2",
"gwai3",
"wo3",
"VQ1",
"dou1",
"jiu3",
"seng4",
"maan6sei3",
"man1",
"VQ2",
"sei3",
"ng5",
"jat6",
"VQ1",
"zing6hai6",
"heoi3",
"Orlando0",
"go2dou6",
"waan2",
"zaa3",
"wo3",
"VQ1",
"mou5",
"je5",
"zou6",
"VQ1"
] | [
46,
68,
64,
116,
83,
18,
21,
21,
9,
116,
83,
21,
81,
46,
46,
62,
83,
46,
46,
62,
83,
21,
75,
102,
64,
75,
116,
116,
83,
76,
50,
75,
83
] | [
9,
4,
10,
6,
0,
2,
4,
4,
11,
6,
0,
4,
8,
9,
9,
14,
0,
9,
9,
14,
0,
4,
5,
15,
10,
5,
6,
6,
0,
5,
14,
5,
0
] |
"TN001-DR300497-GAM2M" | "B" | 10 | [
"噉",
"咪",
"食",
"同",
"玩",
"咪",
"啱",
"。",
"但係",
"我",
"-"
] | [
"gam2",
"mai6",
"sik6",
"tung4",
"waan2",
"mai6",
"aam1",
"VQ1",
"daan6hai6",
"ngo5",
"VQ2"
] | [
64,
21,
75,
18,
75,
21,
9,
83,
18,
64,
83
] | [
10,
4,
5,
2,
5,
4,
11,
0,
2,
10,
0
] |
"TN001-DR300497-HAI6L" | "A" | 11 | [
"係",
"裏邊",
"食",
"囖",
",",
"裏邊",
"冇",
"乜嘢",
"食",
"𡃉",
"咋",
"喎",
"。",
"係",
"唔係",
"啊",
"?",
"你",
"都",
"去",
"過",
"喇",
"。"
] | [
"hai6",
"leoi5bin6",
"sik6",
"lo1",
"VQ2",
"leoi5bin6",
"mou5",
"mat1je5",
"sik6",
"gaa3",
"zaa3",
"wo3",
"VQ1",
"hai6",
"m4hai6",
"aa3",
"VQ6",
"nei5",
"dou1",
"heoi3",
"gwo3",
"laa1",
"VQ1"
] | [
75,
26,
75,
116,
83,
26,
76,
64,
75,
116,
116,
116,
83,
75,
75,
116,
83,
64,
21,
75,
72,
116,
83
] | [
5,
4,
5,
6,
0,
4,
5,
10,
5,
6,
6,
6,
0,
5,
5,
6,
0,
10,
4,
5,
6,
6,
0
] |
"TN001-DR300497-HAI6S" | "B" | 12 | [
"係",
"食",
"嗰啲",
"乜嘢",
"漢堡包",
","
] | [
"hai6",
"sik6",
"go2di1",
"mat1je5",
"hon3bou2baau1",
"VQ2"
] | [
75,
75,
64,
64,
50,
83
] | [
5,
5,
10,
10,
14,
0
] |
"TN001-DR300497-MAI6H" | "A" | 13 | [
"咪",
"係",
"囖",
"。"
] | [
"mai6",
"hai6",
"lo1",
"VQ1"
] | [
18,
75,
116,
83
] | [
2,
5,
6,
0
] |
"TN001-DR300497-SYU4T" | "B" | 14 | [
"薯條",
"𡃉",
"咋",
"。"
] | [
"syu4tiu2",
"gaa3",
"zaa3",
"VQ1"
] | [
50,
116,
116,
83
] | [
14,
6,
6,
0
] |
"TN001-DR300497-WAAN2" | "A" | 15 | [
"玩",
"嗰啲",
",",
"玩",
"乜嘢",
"啊",
"?",
"Magic",
"Kingdom",
"嗰啲",
"囖",
"。",
"係",
"唔係",
"啊",
"?"
] | [
"waan2",
"go2di1",
"VQ2",
"waan2",
"mat1je5",
"aa3",
"VQ6",
"Magic0",
"Kingdom0",
"go2di1",
"lo1",
"VQ1",
"hai6",
"m4hai6",
"aa3",
"VQ6"
] | [
75,
64,
83,
75,
64,
116,
83,
84,
99,
64,
116,
83,
75,
75,
116,
83
] | [
5,
10,
0,
5,
10,
6,
0,
1,
14,
10,
6,
0,
5,
5,
6,
0
] |
"TN001-DR300497-WAAN2" | "B" | 16 | [
"玩",
"就",
",",
"多",
"嘢",
"睇",
"多",
"嘢",
"玩",
"。"
] | [
"waan2",
"zau6",
"VQ2",
"do1",
"je5",
"tai2",
"do1",
"je5",
"waan2",
"VQ1"
] | [
75,
21,
83,
9,
50,
75,
9,
50,
75,
83
] | [
5,
4,
0,
11,
14,
5,
11,
14,
5,
0
] |
"TN001-DR300497-BAT1G" | "A" | 17 | [
"不過",
"幾",
"日",
"都",
"好",
"悶",
"啫",
",",
"喺",
"晒",
"裏邊",
"。"
] | [
"bat1gwo3",
"gei2",
"jat6",
"dou1",
"hou2",
"mun6",
"ze1",
"VQ2",
"hai2",
"saai3",
"leoi5bin6",
"VQ1"
] | [
18,
46,
62,
21,
21,
9,
116,
83,
75,
72,
26,
83
] | [
2,
9,
14,
4,
4,
11,
6,
0,
5,
6,
4,
0
] |
"TN001-DR300497-M4HAI" | "B" | 18 | [
"唔係",
"喎",
"。",
"佢",
"好",
"大",
"𡃉",
"喎",
"。",
"你",
"一",
"日",
"玩",
"一",
"個",
"場",
"喎",
",",
"真係",
"。"
] | [
"m4hai6",
"wo3",
"VQ1",
"keoi5",
"hou2",
"daai6",
"gaa3",
"wo3",
"VQ1",
"nei5",
"jat1",
"jat6",
"waan2",
"jat1",
"go3",
"coeng4",
"wo3",
"VQ2",
"zan1hai6",
"VQ1"
] | [
75,
116,
83,
64,
21,
9,
116,
116,
83,
64,
46,
62,
75,
46,
62,
50,
116,
83,
21,
83
] | [
5,
6,
0,
10,
4,
11,
6,
6,
0,
10,
9,
14,
5,
9,
14,
14,
6,
0,
4,
0
] |
"TN001-DR300497-HOU2#" | "A" | 19 | [
"好",
"○",
"𡃉",
"。",
"玩",
"咗",
"咁",
"多",
"日",
"。"
] | [
"hou2",
"#",
"gaa3",
"VQ1",
"waan2",
"zo2",
"gam3",
"do1",
"jat6",
"VQ1"
] | [
21,
2,
116,
83,
75,
72,
21,
9,
62,
83
] | [
4,
1,
6,
0,
5,
6,
4,
11,
14,
0
] |
"TN001-DR300497-GAM2M" | "B" | 20 | [
"噉",
"咪",
"瞓覺",
"囖",
"。"
] | [
"gam2",
"mai6",
"fan3gaau3",
"lo1",
"VQ1"
] | [
64,
21,
75,
116,
83
] | [
10,
4,
5,
6,
0
] |
"TN001-DR300497-CI1SI" | "A" | 21 | [
"黐線",
"。",
"搭",
"飛機",
"去",
"過",
"嗰邊",
"瞓覺",
"呀",
"?"
] | [
"ci1sin3",
"VQ1",
"daap3",
"fei1gei1",
"heoi3",
"gwo3",
"go2bin1",
"fan3gaau3",
"aa4",
"VQ6"
] | [
44,
83,
75,
50,
79,
75,
64,
75,
116,
83
] | [
1,
0,
5,
14,
5,
5,
10,
5,
6,
0
] |
"TN001-DR300497-GAM2F" | "B" | 22 | [
"噉",
"放假",
"係",
"relax",
"。",
"係",
"噉",
"𡃉",
"喇",
"。"
] | [
"gam2",
"fong3gaa3",
"hai6",
"relax0",
"VQ1",
"hai6",
"gam2",
"gaa3",
"laa1",
"VQ1"
] | [
18,
75,
75,
112,
83,
75,
64,
116,
116,
83
] | [
2,
5,
5,
5,
0,
5,
10,
6,
6,
0
] |
"TN001-DR300497-BAT1G" | "A" | 23 | [
"不過",
"都",
",",
"不過",
"真係",
"好",
"大",
"裏邊",
"。",
"佢",
"直程",
"可以",
"俾",
"餅",
"帶",
"你",
"呢",
"。",
"借",
"俾",
"你",
"睇",
"喎",
",",
"睇",
"下",
"裏面",
"有",
"啲",
"乜嘢",
"酒店",
"啊",
",",
"同埋",
"有",
"啲",
"乜嘢",
"嘢",
"玩",
"嚹",
"。",
"直程",
"可以",
"租",
",",
"借",
"餅",
"帶",
"俾",
"你",
"返",
"屋企",
"睇",
"喎",
"。"
] | [
"bat1gwo3",
"dou1",
"VQ2",
"bat1gwo3",
"zan1hai6",
"hou2",
"daai6",
"leoi5bin6",
"VQ1",
"keoi5",
"zik6cing4",
"ho2ji5",
"bei2",
"beng2",
"daai2",
"nei5",
"ne1",
"VQ1",
"ze3",
"bei2",
"nei5",
"tai2",
"wo3",
"VQ2",
"tai2",
"haa5",
"leoi5min6",
"jau5",
"di1",
"mat1je5",
"zau2dim3",
"aa3",
"VQ2",
"tung4maai4",
"jau5",
"di1",
"mat1je5",
"je5",
"waan2",
"laa3",
"VQ1",
"zik6cing4",
"ho2ji5",
"zou1",
"VQ2",
"ze3",
"beng2",
"daai2",
"bei2",
"nei5",
"faan1",
"uk1kei2",
"tai2",
"wo3",
"VQ1"
] | [
18,
21,
83,
18,
21,
21,
9,
26,
83,
64,
21,
81,
75,
62,
50,
64,
118,
83,
75,
60,
64,
75,
118,
83,
75,
72,
26,
76,
62,
64,
50,
116,
83,
18,
76,
62,
64,
50,
75,
116,
83,
21,
81,
75,
83,
75,
62,
50,
60,
64,
75,
50,
75,
116,
83
] | [
2,
4,
0,
2,
4,
4,
11,
4,
0,
10,
4,
8,
5,
14,
14,
10,
6,
0,
5,
3,
10,
5,
6,
0,
5,
6,
4,
5,
14,
10,
14,
6,
0,
2,
5,
14,
10,
14,
5,
6,
0,
4,
8,
5,
0,
5,
14,
14,
3,
10,
5,
14,
5,
6,
0
] |
"TN001-DR300497-GAM2A" | "B" | 24 | [
"噉",
"呀",
",",
"我",
"好",
"啲",
"喎",
"。"
] | [
"gam2",
"aa4",
"VQ2",
"ngo5",
"hou2",
"di1",
"wo3",
"VQ1"
] | [
18,
116,
83,
64,
9,
72,
116,
83
] | [
2,
6,
0,
10,
11,
6,
6,
0
] |
"TN001-DR300497-DIM2G" | "A" | 25 | [
"點解",
"啊",
"?"
] | [
"dim2gaai2",
"aa3",
"VQ6"
] | [
64,
116,
83
] | [
10,
6,
0
] |
"TN001-DR300497-JAN1W" | "B" | 26 | [
"因為",
"我",
"老公",
"做",
"哩",
"行",
"。",
"我",
"可以",
"唔使",
"話",
"。",
"乜嘢",
"都",
"問",
"佢",
"。",
"我",
"唔使",
"記",
"。"
] | [
"jan1wai6",
"ngo5",
"lou5gung1",
"zou6",
"ni1",
"hong4",
"VQ1",
"ngo5",
"ho2ji5",
"m4sai2",
"waa6",
"VQ1",
"mat1je5",
"dou1",
"man6",
"keoi5",
"VQ1",
"ngo5",
"m4sai2",
"gei3",
"VQ1"
] | [
18,
64,
50,
75,
64,
50,
83,
64,
81,
81,
75,
83,
64,
21,
75,
64,
83,
64,
81,
75,
83
] | [
2,
10,
14,
5,
10,
14,
0,
10,
8,
8,
5,
0,
10,
4,
5,
10,
0,
10,
8,
5,
0
] |
"TN001-DR300497-HEOI3" | "A" | 27 | [
"去",
"過",
"幾",
"次",
"唧",
",",
"你",
"老公",
"Orlando",
"嗰度",
"?"
] | [
"heoi3",
"gwo3",
"gei2",
"ci3",
"zek1",
"VQ2",
"nei5",
"lou5gung1",
"Orlando0",
"go2dou6",
"VQ6"
] | [
75,
72,
64,
62,
116,
83,
64,
50,
102,
64,
83
] | [
5,
6,
10,
14,
6,
0,
10,
14,
15,
10,
0
] |
"TN001-DR300497-NGO5M" | "B" | 28 | [
"我",
"唔",
"知",
"𡃉",
",",
"噉",
"平",
"咪",
"去",
"囖",
"。"
] | [
"ngo5",
"m4",
"zi1",
"gaa3",
"VQ2",
"gam2",
"peng4",
"mai6",
"heoi3",
"lo1",
"VQ1"
] | [
64,
21,
75,
116,
83,
18,
9,
21,
75,
116,
83
] | [
10,
4,
5,
6,
0,
2,
11,
4,
5,
6,
0
] |
"TN001-DR300497-HAA2V" | "A" | 29 | [
"吓",
"?"
] | [
"haa2",
"VQ6"
] | [
24,
83
] | [
7,
0
] |
"TN001-DR300497-ZIK1H" | "B" | 30 | [
"即係",
"不過",
"要",
"係",
"淡季",
"嘅",
"時候",
"呢",
"。",
"就",
"唔",
"唔",
"-",
",",
"一定",
"唔",
"會",
"七",
"八月",
"𡃉",
"嚹",
"。"
] | [
"zik1hai6",
"bat1gwo3",
"jiu3",
"hai6",
"daam6gwai3",
"ge3",
"si4hau6",
"ne1",
"VQ1",
"zau6",
"m4",
"m4",
"VQ2",
"VQ2",
"jat1ding6",
"m4",
"wui5",
"cat1",
"baat3jyut6",
"gaa3",
"laa3",
"VQ1"
] | [
21,
18,
81,
75,
13,
72,
50,
116,
83,
21,
21,
21,
83,
83,
21,
21,
81,
46,
68,
116,
116,
83
] | [
4,
2,
8,
5,
14,
6,
14,
6,
0,
4,
4,
4,
0,
0,
4,
4,
8,
9,
4,
6,
6,
0
] |
"TN001-DR300497-HAA2V" | "A" | 31 | [
"吓",
",",
"咪",
"重",
"好",
"。",
"唔使",
"咁",
"多",
"人",
"排長",
"-",
",",
"排隊",
"。"
] | [
"haa2",
"VQ2",
"mai6",
"zung6",
"hou2",
"VQ1",
"m4sai2",
"gam3",
"do1",
"jan4",
"paai4coeng4",
"VQ2",
"VQ2",
"paai4deoi2",
"VQ1"
] | [
24,
83,
21,
21,
9,
83,
75,
21,
9,
50,
75,
83,
83,
75,
83
] | [
7,
0,
4,
4,
11,
0,
5,
4,
11,
14,
5,
0,
0,
5,
0
] |
"TN001-DR300497-GAM2H" | "B" | 32 | [
"噉",
"係",
"啊",
"。",
"但係",
"要",
"就",
"啱",
"佢",
"啲",
"假期",
"囖",
"。"
] | [
"gam2",
"hai6",
"aa3",
"VQ1",
"daan6hai6",
"jiu3",
"zau6",
"aam1",
"keoi5",
"di1",
"gaa3kei4",
"lo1",
"VQ1"
] | [
18,
75,
116,
83,
18,
81,
75,
9,
64,
62,
50,
116,
83
] | [
2,
5,
6,
0,
2,
8,
5,
11,
10,
14,
14,
6,
0
] |
"TN001-DR300497-NGO5S" | "A" | 33 | [
"我",
"上次",
"去",
",",
"七",
"八月",
"嗰陣時",
"去",
"呢",
",",
"排",
"一",
"笪",
"地方",
"玩",
"啊",
"都",
"成",
"半",
"粒",
"鐘",
"啊"
] | [
"ngo5",
"soeng6ci3",
"heoi3",
"VQ2",
"cat1",
"baat3jyut6",
"go2zan6si4",
"heoi3",
"ne1",
"VQ2",
"paai4",
"jat1",
"daat3",
"dei6fong1",
"waan2",
"aa3",
"dou1",
"seng4",
"bun3",
"lap1",
"zung1",
"aa3"
] | [
64,
68,
75,
83,
46,
68,
64,
75,
118,
83,
78,
46,
62,
50,
75,
116,
21,
46,
46,
62,
50,
116
] | [
10,
4,
5,
0,
9,
4,
10,
5,
6,
0,
5,
9,
14,
14,
5,
6,
4,
9,
9,
14,
14,
6
] |
"TN001-DR300497-CAA1M" | "B" | 34 | [
"差唔多",
"咋",
"我",
"上次",
"去",
"都",
"係",
"。",
"淡季",
"去",
"都",
"要",
"啊",
"。"
] | [
"caa1m4do1",
"zaa3",
"ngo5",
"soeng6ci3",
"heoi3",
"dou1",
"hai6",
"VQ1",
"daam6gwai3",
"heoi3",
"dou1",
"jiu3",
"aa3",
"VQ1"
] | [
21,
116,
64,
68,
75,
21,
75,
83,
13,
75,
21,
81,
116,
83
] | [
4,
6,
10,
4,
5,
4,
5,
0,
14,
5,
4,
8,
6,
0
] |
"TN001-DR300497-DAAM6" | "A" | 35 | [
"淡季",
"去",
"你",
"幾時",
"啊",
"?"
] | [
"daam6gwai3",
"heoi3",
"nei5",
"gei2si4",
"aa3",
"VQ6"
] | [
13,
75,
64,
64,
116,
83
] | [
14,
5,
10,
10,
6,
0
] |
"TN001-DR300497-GAU2J" | "B" | 36 | [
"九月",
"中",
"。"
] | [
"gau2jyut6",
"zung1",
"VQ1"
] | [
68,
26,
83
] | [
4,
4,
0
] |
"TN001-DR300497-HAI6L" | "A" | 37 | [
"係",
"囖",
"。",
"九月",
"中",
"都",
"咁",
"多",
"人",
"去",
"呀",
"。"
] | [
"hai6",
"lo1",
"VQ1",
"gau2jyut6",
"zung1",
"dou1",
"gam3",
"do1",
"jan4",
"heoi3",
"aa4",
"VQ1"
] | [
75,
116,
83,
68,
26,
21,
21,
9,
50,
75,
116,
83
] | [
5,
6,
0,
4,
4,
4,
4,
11,
14,
5,
6,
0
] |
"TN001-DR300497-HAI6A" | "B" | 38 | [
"係",
"啊",
"。"
] | [
"hai6",
"aa3",
"VQ1"
] | [
75,
116,
83
] | [
5,
6,
0
] |
"TN001-DR300497-GO2BI" | "A" | 39 | [
"嗰邊",
"都",
"有",
"人",
"未",
"放",
"完",
"暑假",
"𡃉",
"咩",
"?"
] | [
"go2bin1",
"dou1",
"jau5",
"jan4",
"mei6",
"fong3",
"jyun4",
"syu2gaa3",
"gaa3",
"me1",
"VQ6"
] | [
64,
21,
76,
50,
21,
78,
72,
50,
116,
116,
83
] | [
10,
4,
5,
14,
4,
5,
6,
14,
6,
6,
0
] |
"TN001-DR300497-M4CIN" | "B" | 40 | [
"唔",
"清楚",
"。",
"但係",
"有啲",
"-",
"有啲",
"遊戲",
"係",
"要",
"排",
"咁",
"耐",
","
] | [
"m4",
"cing1co2",
"VQ1",
"daan6hai6",
"jau5di1",
"VQ2",
"jau5di1",
"jau4hei3",
"hai6",
"jiu3",
"paai4",
"gam3",
"noi6",
"VQ2"
] | [
21,
9,
83,
18,
64,
83,
64,
50,
75,
81,
78,
21,
9,
83
] | [
4,
11,
0,
2,
10,
0,
10,
14,
5,
8,
5,
4,
11,
0
] |
"TN001-DR300497-ZIK1H" | "A" | 41 | [
"即係",
"好多"
] | [
"zik1hai6",
"hou2do1"
] | [
21,
46
] | [
4,
9
] |
"TN001-DR300497-DAAN6" | "B" | 42 | [
"但係",
"有啲",
"唔使",
"。",
"有啲",
"好",
"快",
"。"
] | [
"daan6hai6",
"jau5di1",
"m4sai2",
"VQ1",
"jau5di1",
"hou2",
"faai3",
"VQ1"
] | [
18,
64,
81,
83,
64,
21,
9,
83
] | [
2,
10,
8,
0,
10,
4,
11,
0
] |
"TN001-DR300497-HAI6M" | "A" | 43 | [
"係",
"咩",
"?"
] | [
"hai6",
"me1",
"VQ6"
] | [
75,
116,
83
] | [
5,
6,
0
] |
"TN001-DR300497-DAAN6" | "B" | 44 | [
"但係",
"都",
"唔係",
"好",
"迫人",
"囖",
"。",
"變",
"咗",
"你",
"可以",
"睇",
"嘢",
",",
"睇",
"煙花",
"嗰啲",
"呢",
"就",
"唔使",
"咁",
"迫",
"。"
] | [
"daan6hai6",
"dou1",
"m4hai6",
"hou2",
"bik1jan4",
"lo1",
"VQ1",
"bin3",
"zo2",
"nei5",
"ho2ji5",
"tai2",
"je5",
"VQ2",
"tai2",
"jin1faa1",
"go2di1",
"ne1",
"zau6",
"m4sai2",
"gam3",
"bik1",
"VQ1"
] | [
18,
21,
75,
21,
9,
116,
83,
75,
72,
64,
81,
75,
50,
83,
75,
50,
64,
118,
21,
81,
21,
9,
83
] | [
2,
4,
5,
4,
11,
6,
0,
5,
6,
10,
8,
5,
14,
0,
5,
14,
10,
6,
4,
8,
4,
11,
0
] |
"TN001-DR300497-HAI6L" | "A" | 45 | [
"係",
"囖",
",",
"係",
"囖",
"。",
"好多",
"人",
"嚹",
"我",
"上次",
"。",
"等",
"到",
"頸",
"都",
"長",
"埋",
"啊",
"。",
"來",
"嚹",
",",
"下次",
"去",
"邊",
"嚹",
"?"
] | [
"hai6",
"lo1",
"VQ2",
"hai6",
"lo1",
"VQ1",
"hou2do1",
"jan4",
"laa3",
"ngo5",
"soeng6ci3",
"VQ1",
"dang2",
"dou3",
"geng2",
"dou1",
"coeng4",
"maai4",
"aa3",
"VQ1",
"lai4",
"laa3",
"VQ2",
"haa6ci3",
"heoi3",
"bin1",
"laa3",
"VQ6"
] | [
75,
116,
83,
75,
116,
83,
46,
50,
116,
64,
68,
83,
75,
72,
50,
21,
9,
72,
116,
83,
75,
116,
83,
68,
75,
64,
116,
83
] | [
5,
6,
0,
5,
6,
0,
9,
14,
6,
10,
4,
0,
5,
6,
14,
4,
11,
6,
6,
0,
5,
6,
0,
4,
5,
10,
6,
0
] |
"TN001-DR300497-MEI6D" | "B" | 46 | [
"未",
"定",
"啊",
"。",
"要",
"等",
"我",
"老公",
",",
"睇",
"下",
"佢",
"嗰度",
"有",
"乜嘢",
"平",
"嘅",
",",
"即係",
"平",
"嘅",
"package",
",",
"咪",
"睇",
"下",
"去",
"邊",
"囖",
"。"
] | [
"mei6",
"ding6",
"aa3",
"VQ1",
"jiu3",
"dang2",
"ngo5",
"lou5gung1",
"VQ2",
"tai2",
"haa5",
"keoi5",
"go2dou6",
"jau5",
"mat1je5",
"peng4",
"ge3",
"VQ2",
"zik1hai6",
"peng4",
"ge3",
"package0",
"VQ2",
"mai6",
"tai2",
"haa5",
"heoi3",
"bin1",
"lo1",
"VQ1"
] | [
21,
75,
116,
83,
81,
75,
64,
50,
83,
75,
72,
64,
64,
76,
64,
9,
72,
83,
21,
9,
72,
99,
83,
21,
75,
72,
75,
64,
116,
83
] | [
4,
5,
6,
0,
8,
5,
10,
14,
0,
5,
6,
10,
10,
5,
10,
11,
6,
0,
4,
11,
6,
14,
0,
4,
5,
6,
5,
10,
6,
0
] |
"TN001-DR300497-NEI5S" | "A" | 47 | [
"你",
"想",
"去",
"邊",
"喇",
"噉樣",
"?"
] | [
"nei5",
"soeng2",
"heoi3",
"bin1",
"laa1",
"gam2joeng2",
"VQ6"
] | [
64,
81,
75,
64,
116,
64,
83
] | [
10,
8,
5,
10,
6,
10,
0
] |
"TN001-DR300497-DAAN6" | "B" | 48 | [
"但係",
"我",
"最",
"想",
"去",
"澳洲",
"紐西蘭",
"嗰邊",
",",
"因為",
"都",
"未",
"去",
"過",
"。"
] | [
"daan6hai6",
"ngo5",
"zeoi3",
"soeng2",
"heoi3",
"ou3zau1",
"nau5sai1laan4",
"go2bin1",
"VQ2",
"jan1wai6",
"dou1",
"mei6",
"heoi3",
"gwo3",
"VQ1"
] | [
18,
64,
21,
81,
75,
54,
54,
64,
83,
18,
21,
21,
75,
72,
83
] | [
2,
10,
4,
8,
5,
15,
15,
10,
0,
2,
4,
4,
5,
6,
0
] |
"TN001-DR300497-HAI6A" | "A" | 49 | [
"係",
"呀",
"?",
"澳洲",
"你",
"都",
"未",
"去",
"過",
"咩",
"?"
] | [
"hai6",
"aa4",
"VQ6",
"ou3zau1",
"nei5",
"dou1",
"mei6",
"heoi3",
"gwo3",
"me1",
"VQ6"
] | [
75,
116,
83,
54,
64,
21,
21,
75,
72,
116,
83
] | [
5,
6,
0,
15,
10,
4,
4,
5,
6,
6,
0
] |
"TN001-DR300497-M6VQ2" | "B" | 50 | [
"嗯",
",",
"未",
"啊",
"。"
] | [
"m6",
"VQ2",
"mei6",
"aa3",
"VQ1"
] | [
24,
83,
21,
116,
83
] | [
7,
0,
4,
6,
0
] |
"TN001-DR300497-O3VQ2" | "A" | 51 | [
"哦",
",",
"我",
"以為",
"淨係",
"未",
",",
"紐西蘭",
"未",
"去",
"過",
"𠻹",
"。",
"不過",
"我",
"都",
"想",
"去",
"紐西蘭",
"。"
] | [
"o3",
"VQ2",
"ngo5",
"ji5wai4",
"zing6hai6",
"mei6",
"VQ2",
"nau5sai1laan4",
"mei6",
"heoi3",
"gwo3",
"tim1",
"VQ1",
"bat1gwo3",
"ngo5",
"dou1",
"soeng2",
"heoi3",
"nau5sai1laan4",
"VQ1"
] | [
24,
83,
64,
75,
21,
21,
83,
54,
21,
75,
72,
116,
83,
18,
64,
21,
81,
75,
54,
83
] | [
7,
0,
10,
5,
4,
4,
0,
15,
4,
5,
6,
6,
0,
2,
10,
4,
8,
5,
15,
0
] |
"TN001-DR300497-M6VQ2" | "B" | 52 | [
"嗯",
",",
"咪",
"去",
"親",
"都",
"係",
"棱",
"埋",
"紐西蘭",
"一齊",
"𡃉",
"喇",
"?"
] | [
"m6",
"VQ2",
"mai6",
"heoi3",
"can1",
"dou1",
"hai6",
"lang3",
"maai4",
"nau5sai1laan4",
"jat1cai4",
"gaa3",
"laa1",
"VQ6"
] | [
24,
83,
21,
75,
72,
21,
75,
75,
72,
54,
21,
116,
116,
83
] | [
7,
0,
4,
5,
6,
4,
5,
5,
6,
15,
4,
6,
6,
0
] |
"TN001-DR300497-M4HAI" | "A" | 53 | [
"唔係",
"啊",
"。",
"上次",
"我",
"淨係",
"去",
"澳洲",
"咋",
"。",
"冇",
"去",
"紐西蘭",
"嚹",
"。"
] | [
"m4hai6",
"aa3",
"VQ1",
"soeng6ci3",
"ngo5",
"zing6hai6",
"heoi3",
"ou3zau1",
"zaa3",
"VQ1",
"mou5",
"heoi3",
"nau5sai1laan4",
"laa3",
"VQ1"
] | [
75,
116,
83,
68,
64,
21,
75,
54,
116,
83,
76,
75,
54,
116,
83
] | [
5,
6,
0,
4,
10,
4,
5,
15,
6,
0,
5,
5,
15,
6,
0
] |
"TN001-DR300497-M4HAI" | "B" | 54 | [
"唔係",
"即係",
"嗰頭",
",",
"同埋",
"嗰",
"-",
"嗰邊",
"。",
"噉",
"變",
"咗",
"你",
"係",
"嘞",
",",
"唔使",
"坐",
"咁",
"耐",
"機",
"喇",
",",
"一次過",
"坐",
"晒",
"咯",
"。"
] | [
"m4hai6",
"zik1hai6",
"go2tau4",
"VQ2",
"tung4maai4",
"go2",
"VQ2",
"go2bin1",
"VQ1",
"gam2",
"bin3",
"zo2",
"nei5",
"hai6",
"laak3",
"VQ2",
"m4sai2",
"co5",
"gam3",
"noi6",
"gei1",
"laa1",
"VQ2",
"jat1ci3gwo3",
"co5",
"saai3",
"lok3",
"VQ1"
] | [
75,
21,
64,
83,
18,
64,
83,
64,
83,
18,
75,
72,
64,
75,
116,
83,
81,
75,
21,
9,
52,
84,
83,
21,
75,
72,
116,
83
] | [
5,
4,
10,
0,
2,
10,
0,
10,
0,
2,
5,
6,
10,
5,
6,
0,
8,
5,
4,
11,
14,
1,
0,
4,
5,
6,
6,
0
] |
"TN001-DR300497-M4SAI" | "A" | 55 | [
"唔使",
"分開",
"兩",
"次",
"。",
"不過",
"新西",
"-",
"紐西蘭",
"係",
"唔係",
"都",
"-",
"紐西蘭",
"定",
"新西蘭",
"唧",
"究竟",
"?"
] | [
"m4sai2",
"fan1hoi1",
"loeng5",
"ci3",
"VQ1",
"bat1gwo3",
"san1sai1",
"VQ2",
"nau5sai1laan4",
"hai6",
"m4hai6",
"dou1",
"VQ2",
"nau5sai1laan4",
"ding6",
"san1sai1laan4",
"zek1",
"gau3ging2",
"VQ6"
] | [
81,
75,
46,
62,
83,
18,
55,
83,
54,
75,
75,
21,
83,
54,
18,
54,
116,
21,
83
] | [
8,
5,
9,
14,
0,
2,
15,
0,
15,
5,
5,
4,
0,
15,
2,
15,
6,
4,
0
] |
"TN001-DR300497-E6VQ2" | "B" | 56 | [
"誒",
",",
"有",
"人",
"叫",
"New Zealand",
"即係",
"紐",
",",
"又",
"照",
"譯",
"紐西蘭",
"。",
"有",
"啲",
"叫做",
"新西蘭",
"。",
"其實",
"都",
"係",
"一樣",
",",
"一",
"個",
"地方",
"。"
] | [
"e6",
"VQ2",
"jau5",
"jan4",
"giu3",
"New_Zealand0",
"zik1hai6",
"nau5",
"VQ2",
"jau6",
"ziu3",
"jik6",
"nau5sai1laan4",
"VQ1",
"jau5",
"di1",
"giu3zou6",
"san1sai1laan4",
"VQ1",
"kei4sat6",
"dou1",
"hai6",
"jat1joeng6",
"VQ2",
"jat1",
"go3",
"dei6fong1",
"VQ1"
] | [
24,
83,
76,
50,
75,
102,
21,
84,
83,
21,
77,
75,
54,
83,
76,
62,
75,
54,
83,
21,
21,
75,
9,
83,
46,
62,
50,
83
] | [
7,
0,
5,
14,
5,
15,
4,
1,
0,
4,
4,
5,
15,
0,
5,
14,
5,
15,
0,
4,
4,
5,
11,
0,
9,
14,
14,
0
] |
"TN001-DR300497-HAI6L" | "A" | 57 | [
"係",
"囖",
"。",
"好似",
"幾",
"靚",
"喎",
"啲",
"風景",
",",
"係",
"唔係",
"啊",
"?"
] | [
"hai6",
"lo1",
"VQ1",
"hou2ci5",
"gei2",
"leng3",
"wo3",
"di1",
"fung1ging2",
"VQ2",
"hai6",
"m4hai6",
"aa3",
"VQ6"
] | [
75,
116,
83,
75,
21,
9,
116,
62,
50,
83,
75,
75,
116,
83
] | [
5,
6,
0,
5,
4,
11,
6,
14,
14,
0,
5,
5,
6,
0
] |
"TN001-DR300497-M6VQ2" | "B" | 58 | [
"嗯",
",",
"係",
"啊",
"。"
] | [
"m6",
"VQ2",
"hai6",
"aa3",
"VQ1"
] | [
24,
83,
75,
116,
83
] | [
7,
0,
5,
6,
0
] |
"TN001-DR300497-HAI6L" | "A" | 59 | [
"係",
"囖",
"。"
] | [
"hai6",
"lo1",
"VQ1"
] | [
75,
116,
83
] | [
5,
6,
0
] |
"TN001-DR300497-JAU5H" | "B" | 60 | [
"有",
"海豚",
"定係",
"有",
"鯨魚",
"睇",
"𡃉",
"。",
"有啲",
"觀團",
",",
"可以",
"出海",
"。"
] | [
"jau5",
"hoi2tyun4",
"ding6hai6",
"jau5",
"king4jyu4",
"tai2",
"gaa3",
"VQ1",
"jau5di1",
"gun1tyun4",
"VQ2",
"ho2ji5",
"ceot1hoi2",
"VQ1"
] | [
76,
50,
18,
76,
50,
75,
116,
83,
64,
50,
83,
81,
75,
83
] | [
5,
14,
2,
5,
14,
5,
6,
0,
10,
14,
0,
8,
5,
0
] |
"TN001-DR300497-NEI5D" | "A" | 61 | [
"你",
"都",
"唔",
"游水",
"。"
] | [
"nei5",
"dou1",
"m4",
"jau4seoi2",
"VQ1"
] | [
64,
21,
21,
75,
83
] | [
10,
4,
4,
5,
0
] |
"TN001-DR300497-DAAN6" | "B" | 62 | [
"但係",
"要",
"-",
"唔係",
"啊",
",",
"嗰啲",
"要",
"夾",
"啱",
"時間",
"𡃉",
"。",
"要",
"睇",
"佢",
"個",
"嗰",
"嗰",
"段",
"時間",
"啱",
"呢",
"就",
"有",
"一",
"羣",
"𡃉",
"。"
] | [
"daan6hai6",
"jiu3",
"VQ2",
"m4hai6",
"aa3",
"VQ2",
"go2di1",
"jiu3",
"gaap3",
"aam1",
"si4gaan3",
"gaa3",
"VQ1",
"jiu3",
"tai2",
"keoi5",
"go3",
"go2",
"go2",
"dyun6",
"si4gaan3",
"aam1",
"ne1",
"zau6",
"jau5",
"jat1",
"kwan4",
"gaa3",
"VQ1"
] | [
18,
81,
83,
75,
116,
83,
64,
81,
75,
9,
50,
116,
83,
81,
75,
64,
62,
64,
64,
62,
50,
9,
118,
21,
76,
46,
62,
116,
83
] | [
2,
8,
0,
5,
6,
0,
10,
8,
5,
11,
14,
6,
0,
8,
5,
10,
14,
10,
10,
14,
14,
11,
6,
4,
5,
9,
14,
6,
0
] |
"TN001-DR300497-NI1GO" | "A" | 63 | [
"哩個",
"去",
"-",
"哩個",
"去",
"帕斯",
"咋",
"喎",
"。"
] | [
"ni1go3",
"heoi3",
"VQ2",
"ni1go3",
"heoi3",
"paak3si1",
"zaa3",
"wo3",
"VQ1"
] | [
64,
75,
83,
64,
75,
54,
116,
116,
83
] | [
10,
5,
0,
10,
5,
15,
6,
6,
0
] |
"TN001-DR300497-HAI6L" | "B" | 64 | [
"係",
"囖",
"咪",
"澳洲",
"嗰邊",
"囖",
",",
"Perth",
"。"
] | [
"hai6",
"lo1",
"mai6",
"ou3zau1",
"go2bin1",
"lo1",
"VQ2",
"Perth0",
"VQ1"
] | [
75,
116,
21,
54,
64,
116,
83,
102,
83
] | [
5,
6,
4,
15,
10,
6,
0,
15,
0
] |
"TN001-DR300497-HAI6A" | "A" | 65 | [
"係",
"啊",
",",
"係",
"啊",
"。"
] | [
"hai6",
"aa3",
"VQ2",
"hai6",
"aa3",
"VQ1"
] | [
75,
116,
83,
75,
116,
83
] | [
5,
6,
0,
5,
6,
0
] |
"TN001-DR300497-HAI6L" | "B" | 66 | [
"係",
"囖",
"。"
] | [
"hai6",
"lo1",
"VQ1"
] | [
75,
116,
83
] | [
5,
6,
0
] |
"TN001-DR300497-BAT1G" | "A" | 67 | [
"不過",
"澳洲",
"好似",
"冇",
"乜",
",",
"但係",
"哩個",
"Perth",
"好似",
"好",
","
] | [
"bat1gwo3",
"ou3zau1",
"hou2ci5",
"mou5",
"mat1",
"VQ2",
"daan6hai6",
"ni1go3",
"Perth0",
"hou2ci5",
"hou2",
"VQ2"
] | [
18,
54,
75,
76,
64,
83,
18,
64,
102,
75,
21,
83
] | [
2,
15,
5,
5,
10,
0,
2,
10,
15,
5,
4,
0
] |
"TN001-DR300497-DAAN6" | "B" | 68 | [
"但係",
"你",
"join",
"佢",
"嗰啲",
"tour",
"都",
"-",
"都",
"未必",
"一定",
"包",
"一定",
"睇",
"得",
"到",
"。",
"即係",
"佢",
"嗰個",
"叫做",
"唔",
"知",
"觀",
"鯨",
"團",
"定係",
"乜嘢",
"噉樣",
"𡃉",
"。",
"就",
"唔",
"一定",
"包",
"你",
"睇",
"到",
"囖",
"。",
"即係",
"佢",
"會",
"有",
"-",
"有",
"隻",
"船",
"出海",
"。",
"噉樣",
"就"
] | [
"daan6hai6",
"nei5",
"join0",
"keoi5",
"go2di1",
"tour0",
"dou1",
"VQ2",
"dou1",
"mei6bit1",
"jat1ding6",
"baau1",
"jat1ding6",
"tai2",
"dak1",
"dou2",
"VQ1",
"zik1hai6",
"keoi5",
"go2go3",
"giu3zou6",
"m4",
"zi1",
"gun1",
"king4",
"tyun4",
"ding6hai6",
"mat1je5",
"gam2joeng2",
"gaa3",
"VQ1",
"zau6",
"m4",
"jat1ding6",
"baau1",
"nei5",
"tai2",
"dou2",
"lo1",
"VQ1",
"zik1hai6",
"keoi5",
"wui5",
"jau5",
"VQ2",
"jau5",
"zek3",
"syun4",
"ceot1hoi2",
"VQ1",
"gam2joeng2",
"zau6"
] | [
18,
64,
112,
64,
64,
99,
21,
83,
21,
21,
21,
75,
21,
75,
72,
72,
83,
21,
64,
64,
75,
21,
75,
75,
50,
50,
18,
64,
64,
116,
83,
21,
21,
21,
75,
64,
75,
72,
116,
83,
21,
64,
81,
76,
83,
76,
62,
50,
75,
83,
64,
21
] | [
2,
10,
5,
10,
10,
14,
4,
0,
4,
4,
4,
5,
4,
5,
6,
6,
0,
4,
10,
10,
5,
4,
5,
5,
14,
14,
2,
10,
10,
6,
0,
4,
4,
4,
5,
10,
5,
6,
6,
0,
4,
10,
8,
5,
0,
5,
14,
14,
5,
0,
10,
4
] |
"TN001-DR300497-ZIK1H" | "A" | 69 | [
"即係",
"到時",
"自己",
"join",
"嗰啲",
"local",
"tour",
"嚹",
"喎",
",",
"意思",
"係",
"。"
] | [
"zik1hai6",
"dou3si4",
"zi6gei2",
"join0",
"go2di1",
"local0",
"tour0",
"laa3",
"wo3",
"VQ2",
"ji3si1",
"hai6",
"VQ1"
] | [
21,
68,
64,
112,
64,
85,
99,
116,
116,
83,
50,
75,
83
] | [
4,
4,
10,
5,
10,
11,
14,
6,
6,
0,
14,
5,
0
] |
"TN001-DR300497-E6VQ2" | "B" | 70 | [
"誒",
",",
"如果",
"係",
"自己",
"去",
"就",
"係",
"囖",
"。",
"係",
"囖",
",",
"即係",
"佢",
"有",
"啲",
"噉",
"嘅",
"團",
"𡃉",
"直頭",
"。"
] | [
"e6",
"VQ2",
"jyu4gwo2",
"hai6",
"zi6gei2",
"heoi3",
"zau6",
"hai6",
"lo1",
"VQ1",
"hai6",
"lo1",
"VQ2",
"zik1hai6",
"keoi5",
"jau5",
"di1",
"gam2",
"ge3",
"tyun4",
"gaa3",
"zik6tau4",
"VQ1"
] | [
24,
83,
18,
75,
64,
75,
21,
75,
116,
83,
75,
116,
83,
18,
64,
76,
62,
64,
72,
50,
116,
21,
83
] | [
7,
0,
2,
5,
10,
5,
4,
5,
6,
0,
5,
6,
0,
2,
10,
5,
14,
10,
6,
14,
6,
4,
0
] |
"TN001-DR300497-HAI6M" | "A" | 71 | [
"係",
"咩",
"?"
] | [
"hai6",
"me1",
"VQ6"
] | [
75,
116,
83
] | [
5,
6,
0
] |
"TN001-DR300497-HAI6A" | "B" | 72 | [
"係",
"啊",
"。"
] | [
"hai6",
"aa3",
"VQ1"
] | [
75,
116,
83
] | [
5,
6,
0
] |
"TN001-DR300497-ZING6" | "A" | 73 | [
"淨係",
"睇",
"鯨魚",
"呀",
"?",
"有",
"乜嘢",
"睇",
"啊",
"?"
] | [
"zing6hai6",
"tai2",
"king4jyu4",
"aa4",
"VQ6",
"jau5",
"mat1je5",
"tai2",
"aa3",
"VQ6"
] | [
21,
75,
50,
116,
83,
76,
64,
75,
116,
83
] | [
4,
5,
14,
6,
0,
5,
10,
5,
6,
0
] |
"TN001-DR300497-HAI2G" | "B" | 74 | [
"喺",
"個",
"海",
"度",
"。",
"噉",
"係",
"啊",
",",
"係",
"睇",
"海",
"啊",
",",
"睇",
"珊瑚",
"嗰啲",
"嘢",
"囖",
"。"
] | [
"hai2",
"go3",
"hoi2",
"dou6",
"VQ1",
"gam2",
"hai6",
"aa3",
"VQ2",
"hai6",
"tai2",
"hoi2",
"aa3",
"VQ2",
"tai2",
"saan1wu4",
"go2di1",
"je5",
"lo1",
"VQ1"
] | [
60,
62,
50,
64,
83,
18,
75,
116,
83,
75,
75,
50,
116,
83,
75,
50,
64,
50,
116,
83
] | [
3,
14,
14,
10,
0,
2,
5,
6,
0,
5,
5,
14,
6,
0,
5,
14,
10,
14,
6,
0
] |
"TN001-DR300497-O3ZIK" | "A" | 75 | [
"哦",
"即係",
"餵",
"佢",
"食",
"嘢",
"嗰啲",
"嚹",
"喎",
"。"
] | [
"o3",
"zik1hai6",
"wai3",
"keoi5",
"sik6",
"je5",
"go2di1",
"laa3",
"wo3",
"VQ1"
] | [
24,
21,
75,
64,
75,
50,
64,
116,
116,
83
] | [
7,
4,
5,
10,
5,
14,
10,
6,
6,
0
] |
"TN001-DR300497-M4HAI" | "B" | 76 | [
"唔係",
"啊",
"。",
"佢",
"哩個",
"你",
"係",
"○",
"佢",
"嗰",
"隻",
"唔係",
"𡃉",
"佢",
"嗰",
"隻",
"係",
"睇",
"天然",
"嗰啲",
"𡃉",
"喎",
",",
"唔係",
"人哋",
"養",
"𡃉",
"喎",
"。",
"直頭",
"喺",
"個",
"海",
"你",
"係",
"等",
"佢",
"。",
"即係",
"可能",
"嗰個",
"時期",
"係",
"一",
"群",
"噉",
"出來",
"呢",
",",
"所以",
"有",
"時間",
"性",
"囖",
"。",
"你",
"要",
"睇",
"啱",
"邊個",
"時間",
"去",
",",
"先至",
"有得",
"睇",
"𡃉",
"。",
"唔係",
"你",
"要",
"去",
"就",
"有得",
"睇",
"𡃉",
"嚹",
"。"
] | [
"m4hai6",
"aa3",
"VQ1",
"keoi5",
"ni1go3",
"nei5",
"hai6",
"#",
"keoi5",
"go2",
"zek3",
"m4hai6",
"gaa3",
"keoi5",
"go2",
"zek3",
"hai6",
"tai2",
"tin1jin4",
"go2di1",
"gaa3",
"wo3",
"VQ2",
"m4hai6",
"jan4dei6",
"joeng5",
"gaa3",
"wo3",
"VQ1",
"zik6tau4",
"hai2",
"go3",
"hoi2",
"nei5",
"hai6",
"dang2",
"keoi5",
"VQ1",
"zik1hai6",
"ho2nang4",
"go2go3",
"si4kei4",
"hai6",
"jat1",
"kwan4",
"gam2",
"ceot1lai4",
"ne1",
"VQ2",
"so2ji5",
"jau5",
"si4gaan3",
"sing3",
"lo1",
"VQ1",
"nei5",
"jiu3",
"tai2",
"aam1",
"bin1go3",
"si4gaan3",
"heoi3",
"VQ2",
"sin1zi3",
"jau5dak1",
"tai2",
"gaa3",
"VQ1",
"m4hai6",
"nei5",
"jiu3",
"heoi3",
"zau6",
"jau5dak1",
"tai2",
"gaa3",
"laa3",
"VQ1"
] | [
75,
116,
83,
64,
64,
64,
75,
2,
64,
64,
62,
75,
116,
64,
64,
62,
75,
75,
15,
64,
116,
116,
83,
75,
64,
75,
116,
116,
83,
21,
60,
62,
50,
64,
75,
75,
64,
83,
21,
81,
64,
50,
75,
46,
62,
64,
75,
118,
83,
18,
76,
50,
41,
116,
83,
64,
81,
75,
9,
64,
50,
75,
83,
21,
81,
75,
116,
83,
75,
64,
81,
75,
21,
81,
75,
116,
116,
83
] | [
5,
6,
0,
10,
10,
10,
5,
1,
10,
10,
14,
5,
6,
10,
10,
14,
5,
5,
11,
10,
6,
6,
0,
5,
10,
5,
6,
6,
0,
4,
3,
14,
14,
10,
5,
5,
10,
0,
4,
8,
10,
14,
5,
9,
14,
10,
5,
6,
0,
2,
5,
14,
1,
6,
0,
10,
8,
5,
11,
10,
14,
5,
0,
4,
8,
5,
6,
0,
5,
10,
8,
5,
4,
8,
5,
6,
6,
0
] |
"TN001-DR300497-O3VQ2" | "A" | 77 | [
"哦",
",",
"即係",
"睇",
"下",
"你",
"幾時",
"去",
",",
"又",
"撞",
"啱",
"有",
"啲",
"團",
",",
"噉",
"先",
"有得",
"參加",
"囖",
"。",
"係",
"唔係",
"啊",
"?"
] | [
"o3",
"VQ2",
"zik1hai6",
"tai2",
"haa5",
"nei5",
"gei2si4",
"heoi3",
"VQ2",
"jau6",
"zong6",
"aam1",
"jau5",
"di1",
"tyun4",
"VQ2",
"gam2",
"sin1",
"jau5dak1",
"caam1gaa1",
"lo1",
"VQ1",
"hai6",
"m4hai6",
"aa3",
"VQ6"
] | [
24,
83,
18,
75,
72,
64,
64,
75,
83,
21,
75,
9,
76,
62,
50,
83,
18,
21,
81,
75,
116,
83,
75,
75,
116,
83
] | [
7,
0,
2,
5,
6,
10,
10,
5,
0,
4,
5,
11,
5,
14,
14,
0,
2,
4,
8,
5,
6,
0,
5,
5,
6,
0
] |
"TN001-DR300497-DAAN6" | "B" | 78 | [
"但係",
"我",
"唔",
"知",
"邊個",
"月份",
"係",
"睇",
"鯨魚",
"最",
"好",
"囖",
"。"
] | [
"daan6hai6",
"ngo5",
"m4",
"zi1",
"bin1go3",
"jyut6fan6",
"hai6",
"tai2",
"king4jyu4",
"zeoi3",
"hou2",
"lo1",
"VQ1"
] | [
18,
64,
21,
75,
62,
50,
75,
75,
50,
21,
9,
116,
83
] | [
2,
10,
4,
5,
14,
14,
5,
5,
14,
4,
11,
6,
0
] |
"TN001-DR300497-SYUN3" | "A" | 79 | [
"算",
"喇",
"。",
"我",
"對",
"鯨魚",
"都",
"冇乜",
"興趣",
"。"
] | [
"syun3",
"laa1",
"VQ1",
"ngo5",
"deoi3",
"king4jyu4",
"dou1",
"mou5mat1",
"hing3ceoi3",
"VQ1"
] | [
75,
116,
83,
64,
60,
50,
21,
64,
50,
83
] | [
5,
6,
0,
10,
3,
14,
4,
10,
14,
0
] |
"TN001-DR300497-AA3VQ" | "B" | 80 | [
"啊",
",",
"我",
"想",
"睇",
"下",
"喎",
"。"
] | [
"aa3",
"VQ2",
"ngo5",
"soeng2",
"tai2",
"haa5",
"wo3",
"VQ1"
] | [
24,
83,
64,
81,
75,
72,
116,
83
] | [
7,
0,
10,
8,
5,
6,
6,
0
] |
"TN001-DR300497-HAI6M" | "A" | 81 | [
"係",
"咩",
"?"
] | [
"hai6",
"me1",
"VQ6"
] | [
75,
116,
83
] | [
5,
6,
0
] |
"TN001-DR300497-HAI6J" | "B" | 82 | [
"係",
"喲",
"。",
"因為",
"未",
"見",
"過",
"咁",
"大",
"條",
"鯨魚",
"吖",
"嗎",
"。"
] | [
"hai6",
"jo3",
"VQ1",
"jan1wai6",
"mei6",
"gin3",
"gwo3",
"gam3",
"daai6",
"tiu4",
"king4jyu4",
"aa1",
"maa3",
"VQ1"
] | [
75,
116,
83,
18,
21,
75,
72,
21,
9,
62,
50,
116,
116,
83
] | [
5,
6,
0,
2,
4,
5,
6,
4,
11,
14,
14,
6,
6,
0
] |
"TN001-DR300497-NGAAU" | "A" | 83 | [
"咬",
"唔",
"咬",
"人",
"𡃉",
"?"
] | [
"ngaau5",
"m4",
"ngaau5",
"jan4",
"gaa3",
"VQ6"
] | [
75,
21,
75,
50,
116,
83
] | [
5,
4,
5,
14,
6,
0
] |
"TN001-DR300497-GAM2N" | "B" | 84 | [
"噉",
"我",
"諗",
"我",
"諗",
"即係",
"遠",
"觀",
"𡃉",
"吖",
"嗎",
"。"
] | [
"gam2",
"ngo5",
"nam2",
"ngo5",
"nam2",
"zik1hai6",
"jyun5",
"gun1",
"gaa3",
"aa1",
"maa3",
"VQ1"
] | [
18,
64,
75,
64,
75,
21,
9,
75,
116,
116,
116,
83
] | [
2,
10,
5,
10,
5,
4,
11,
5,
6,
6,
6,
0
] |
"TN001-DR300497-JAU5M" | "A" | 85 | [
"有冇",
"危險",
"𡃉",
"?",
"大佬",
"。"
] | [
"jau5mou5",
"ngai4him2",
"gaa3",
"VQ6",
"daai6lou2",
"VQ1"
] | [
76,
50,
116,
83,
50,
83
] | [
5,
14,
6,
0,
14,
0
] |
"TN001-DR300497-GAM2M" | "B" | 86 | [
"噉",
"唔同",
"睇",
"香港",
"嗰啲",
"海洋公園",
"嗰啲",
",",
"都",
"係",
"細",
"吖",
"嗎",
"屬於",
"。"
] | [
"gam2",
"m4tung4",
"tai2",
"hoeng1gong2",
"go2di1",
"hoi2joeng4gung1jyun2",
"go2di1",
"VQ2",
"dou1",
"hai6",
"sai3",
"aa1",
"maa3",
"suk6jyu1",
"VQ1"
] | [
18,
75,
75,
54,
62,
56,
64,
83,
21,
75,
9,
116,
116,
75,
83
] | [
2,
5,
5,
15,
14,
15,
10,
0,
4,
5,
11,
6,
6,
5,
0
] |
"TN001-DR300497-GAM2Z" | "A" | 87 | [
"噉",
"就",
"係",
",",
"吓",
"?",
"海洋公園",
"嗰啲",
"屬於",
"細",
"呀",
"?",
"嗰啲",
"大",
"成",
"點",
"啊",
"?"
] | [
"gam2",
"zau6",
"hai6",
"VQ2",
"haa2",
"VQ6",
"hoi2joeng4gung1jyun2",
"go2di1",
"suk6jyu1",
"sai3",
"aa4",
"VQ6",
"go2di1",
"daai6",
"seng4",
"dim2",
"aa3",
"VQ6"
] | [
18,
21,
75,
83,
24,
83,
56,
64,
75,
9,
116,
83,
64,
9,
72,
64,
116,
83
] | [
2,
4,
5,
0,
7,
0,
15,
10,
5,
11,
6,
0,
10,
11,
6,
10,
6,
0
] |
"TN001-DR300497-M4ZI1" | "B" | 88 | [
"唔",
"知",
"喲",
"。",
"有時",
"睇",
"即係",
"睇",
"佢",
"嗰啲",
"catalog",
"嗰啲",
"圖片",
"呢",
",",
"嗰",
"條",
"尾",
"都",
"好",
"大",
"條",
"𡃉",
"喎",
"。"
] | [
"m4",
"zi1",
"jo3",
"VQ1",
"jau5si4",
"tai2",
"zik1hai6",
"tai2",
"keoi5",
"go2di1",
"catalog0",
"go2di1",
"tou4pin2",
"ne1",
"VQ2",
"go2",
"tiu4",
"mei5",
"dou1",
"hou2",
"daai6",
"tiu4",
"gaa3",
"wo3",
"VQ1"
] | [
21,
75,
116,
83,
21,
75,
21,
75,
64,
64,
99,
64,
50,
118,
83,
64,
62,
50,
21,
21,
9,
62,
116,
116,
83
] | [
4,
5,
6,
0,
4,
5,
4,
5,
10,
10,
14,
10,
14,
6,
0,
10,
14,
14,
4,
4,
11,
14,
6,
6,
0
] |
"TN001-DR300497-TIU4M" | "A" | 89 | [
"條",
"尾",
"都",
"發",
"死",
"人",
"喇",
",",
"係",
"唔係",
"啊",
"?"
] | [
"tiu4",
"mei5",
"dou1",
"faat3",
"sei2",
"jan4",
"laa1",
"VQ2",
"hai6",
"m4hai6",
"aa3",
"VQ6"
] | [
62,
50,
21,
75,
75,
50,
116,
83,
75,
75,
116,
83
] | [
14,
14,
4,
5,
5,
14,
6,
0,
5,
5,
6,
0
] |
"TN001-DR300497-ZAU6H" | "B" | 90 | [
"就",
"係",
"想",
"去",
"睇",
"下",
"啲",
"咁",
",",
"即係",
"天然",
"嘅",
"嘢",
"囖",
"。"
] | [
"zau6",
"hai6",
"soeng2",
"heoi3",
"tai2",
"haa5",
"di1",
"gam3",
"VQ2",
"zik1hai6",
"tin1jin4",
"ge3",
"je5",
"lo1",
"VQ1"
] | [
21,
75,
81,
75,
75,
72,
62,
21,
83,
21,
15,
72,
50,
116,
83
] | [
4,
5,
8,
5,
5,
6,
14,
4,
0,
4,
11,
6,
14,
6,
0
] |
"TN001-DR300497-DAAN6" | "A" | 91 | [
"但",
"會",
"唔",
"會",
"好",
"貴",
"啊",
"睇",
"哩啲",
"?",
"貴",
"唔",
"貴",
"?"
] | [
"daan6",
"wui5",
"m4",
"wui5",
"hou2",
"gwai3",
"aa3",
"tai2",
"ni1di1",
"VQ6",
"gwai3",
"m4",
"gwai3",
"VQ6"
] | [
18,
81,
21,
81,
21,
9,
116,
75,
64,
83,
9,
21,
9,
83
] | [
2,
8,
4,
8,
4,
11,
6,
5,
10,
0,
11,
4,
11,
0
] |
"TN001-DR300497-GAM2M" | "B" | 92 | [
"噉",
"唔",
"知",
"啊",
"。",
"價錢",
"唔",
"知",
"啊",
"。",
"睇",
"下",
"到時",
"去",
"先至",
"-",
"唔係",
"喇",
",",
"去",
"先至",
"check",
"價錢",
"啊",
"嗰啲",
"嘢",
"囖",
"。"
] | [
"gam2",
"m4",
"zi1",
"aa3",
"VQ1",
"gaa3cin4",
"m4",
"zi1",
"aa3",
"VQ1",
"tai2",
"haa5",
"dou3si4",
"heoi3",
"sin1zi3",
"VQ2",
"m4hai6",
"laa1",
"VQ2",
"heoi3",
"sin1zi3",
"check0",
"gaa3cin4",
"aa3",
"go2di1",
"je5",
"lo1",
"VQ1"
] | [
18,
21,
75,
116,
83,
50,
21,
75,
116,
83,
75,
72,
68,
75,
21,
83,
75,
116,
83,
75,
21,
112,
50,
116,
64,
50,
116,
83
] | [
2,
4,
5,
6,
0,
14,
4,
5,
6,
0,
5,
6,
4,
5,
4,
0,
5,
6,
0,
5,
4,
5,
14,
6,
10,
14,
6,
0
] |
"TN001-DR300497-BAT1G" | "A" | 93 | [
"不過",
"唔緊要",
"喇",
"。",
"噠",
"你",
"老公",
"個",
"朵",
"。",
"幾時",
"度",
"有得",
"睇",
"唧",
"噉",
"哩啲",
",",
"知",
"唔",
"知",
"大概",
"?"
] | [
"bat1gwo3",
"m4gan2jiu3",
"laa1",
"VQ1",
"daat3",
"nei5",
"lou5gung1",
"go3",
"do2",
"VQ1",
"gei2si4",
"dou2",
"jau5dak1",
"tai2",
"zek1",
"gam2",
"ni1di1",
"VQ2",
"zi1",
"m4",
"zi1",
"daai6koi3",
"VQ6"
] | [
18,
43,
116,
83,
75,
64,
50,
62,
50,
83,
64,
46,
81,
75,
116,
64,
64,
83,
75,
21,
75,
21,
83
] | [
2,
1,
6,
0,
5,
10,
14,
14,
14,
0,
10,
9,
8,
5,
6,
10,
10,
0,
5,
4,
5,
4,
0
] |
"TN001-DR300497-NGO5M" | "B" | 94 | [
"我",
"唔",
"知",
"𡃉",
"。",
"我",
"唔",
"知",
"個",
"月份",
"係",
"幾時",
"啊",
"。",
"但",
"我",
"知",
"有",
"哩啲",
"嘢",
"睇",
"。"
] | [
"ngo5",
"m4",
"zi1",
"gaa3",
"VQ1",
"ngo5",
"m4",
"zi1",
"go3",
"jyut6fan6",
"hai6",
"gei2si4",
"aa3",
"VQ1",
"daan6",
"ngo5",
"zi1",
"jau5",
"ni1di1",
"je5",
"tai2",
"VQ1"
] | [
64,
21,
75,
116,
83,
64,
21,
75,
62,
50,
75,
64,
116,
83,
18,
64,
75,
76,
64,
50,
75,
83
] | [
10,
4,
5,
6,
0,
10,
4,
5,
14,
14,
5,
10,
6,
0,
2,
10,
5,
5,
10,
14,
5,
0
] |
"TN001-DR300497-HAI6M" | "A" | 95 | [
"係",
"咩",
"?",
"我",
"又",
"唔係",
"好",
"知",
"喎",
"。"
] | [
"hai6",
"me1",
"VQ6",
"ngo5",
"jau6",
"m4hai6",
"hou2",
"zi1",
"wo3",
"VQ1"
] | [
75,
116,
83,
64,
21,
75,
21,
75,
116,
83
] | [
5,
6,
0,
10,
4,
5,
4,
5,
6,
0
] |
"TN001-DR300497-DAAN6" | "B" | 96 | [
"但係",
"睇",
"好似",
"如果",
"照",
"哩啲",
"報紙",
"噉",
"影",
"出來",
","
] | [
"daan6hai6",
"tai2",
"hou2ci5",
"jyu4gwo2",
"ziu3",
"ni1di1",
"bou3zi2",
"gam2",
"jing2",
"ceot1lai4",
"VQ2"
] | [
18,
75,
75,
18,
75,
64,
50,
64,
75,
75,
83
] | [
2,
5,
5,
2,
5,
10,
14,
10,
5,
5,
0
] |
"TN001-DR300497-JING1" | "B" | 97 | [
"應該",
"係",
"夏天",
"噉",
"嘅",
"氣候",
",",
"係",
"喇",
"。"
] | [
"jing1goi1",
"hai6",
"haa6tin1",
"gam2",
"ge3",
"hei3hau6",
"VQ2",
"hai6",
"laa1",
"VQ1"
] | [
81,
75,
68,
64,
72,
50,
83,
75,
116,
83
] | [
8,
5,
4,
10,
6,
14,
0,
5,
6,
0
] |
"TN001-DR300497-GWAI3" | "A" | 98 | [
"季節",
"。"
] | [
"gwai3zit3",
"VQ1"
] | [
50,
83
] | [
14,
0
] |
"TN001-DR300497-DOU1J" | "A" | 99 | [
"都",
"應該",
"係",
"喇",
"。",
"你",
"喺",
"個",
"海",
"嗰度",
"你",
"梗係",
"着",
"泳褲",
"喇",
",",
"唔係",
"着",
"乜嘢",
"啊",
"。"
] | [
"dou1",
"jing1goi1",
"hai6",
"laa1",
"VQ1",
"nei5",
"hai2",
"go3",
"hoi2",
"go2dou6",
"nei5",
"gang2hai6",
"zoek3",
"wing6fu3",
"laa1",
"VQ2",
"m4hai6",
"zoek3",
"mat1je5",
"aa3",
"VQ1"
] | [
21,
81,
75,
116,
83,
64,
60,
62,
50,
64,
64,
21,
75,
50,
116,
83,
75,
75,
64,
116,
83
] | [
4,
8,
5,
6,
0,
10,
3,
14,
14,
10,
10,
4,
5,
14,
6,
0,
5,
5,
10,
6,
0
] |
Dataset Card for The Hong Kong Cantonese Corpus (HKCanCor)
Dataset Summary
The Hong Kong Cantonese Corpus (HKCanCor) comprise transcribed conversations recorded between March 1997 and August 1998. It contains recordings of spontaneous speech (51 texts) and radio programmes (42 texts), which involve 2 to 4 speakers, with 1 text of monologue.
In total, the corpus contains around 230,000 Chinese words. The text is word-segmented (i.e., tokenization is at word-level, and each token can span multiple Chinese characters). Tokens are annotated with part-of-speech (POS) tags and romanised Cantonese pronunciation.
- Romanisation
- Follows conventions set by the Linguistic Society of Hong Kong (LSHK).
- POS
- The tagset used by this corpus extends the one in the Peita-Fujitsu-Renmin Ribao (PRF) corpus (Duan et al., 2000). Extensions were made to further capture Cantonese-specific phenomena.
- To facilitate everyday usage and for better comparability across languages and/or corpora, this dataset also includes the tags mapped to the Universal Dependencies 2.0 format. This mapping references the PyCantonese library.
Supported Tasks and Leaderboards
[More Information Needed]
Languages
Yue Chinese / Cantonese (Hong Kong).
Dataset Structure
This corpus has 10801 utterances and approximately 230000 Chinese words. There is no predefined split.
Data Instances
Each instance contains a conversation id, speaker id within that conversation, turn number, part-of-speech tag for each Chinese word in the PRF format and UD2.0 format, and the utterance written in Chinese characters as well as its LSHK format romanisation.
For example:
{
'conversation_id': 'TNR016-DR070398-HAI6V'
'pos_tags_prf': ['v', 'w'],
'pos_tags_ud': ['VERB', 'PUNCT'],
'speaker': 'B',
'transcriptions': ['hai6', 'VQ1'],
'turn_number': 112,
'tokens': ['係', '。']
}
Data Fields
- conversation_id: unique dialogue-level id
- pos_tags_prf: POS tag using the PRF format at token-level
- pos_tag_ud: POS tag using the UD2.0 format at token-level
- speaker: unique speaker id within dialogue
- transcriptions: token-level romanisation in the LSHK format
- turn_number: turn number in dialogue
- tokens: Chinese word or punctuation at token-level
Data Splits
There are no specified splits in this dataset.
Dataset Creation
Curation Rationale
[More Information Needed]
Source Data
Initial Data Collection and Normalization
[More Information Needed]
Who are the source language producers?
[More Information Needed]
Annotations
Annotation process
[More Information Needed]
Who are the annotators?
[More Information Needed]
Personal and Sensitive Information
[More Information Needed]
Considerations for Using the Data
Social Impact of Dataset
[More Information Needed]
Discussion of Biases
[More Information Needed]
Other Known Limitations
[More Information Needed]
Additional Information
Dataset Curators
[More Information Needed]
Licensing Information
This work is licensed under a Creative Commons Attribution 4.0 International License.
Citation Information
This corpus was developed by Luke and Wong, 2015.
@article{luke2015hong,
author={Luke, Kang-Kwong and Wong, May LY},
title={The Hong Kong Cantonese corpus: design and uses},
journal={Journal of Chinese Linguistics},
year={2015},
pages={309-330},
month={12}
}
The POS tagset to Universal Dependency tagset mapping is provided by Jackson Lee, as a part of the PyCantonese library.
@misc{lee2020,
author = {Lee, Jackson},
title = {PyCantonese: Cantonese Linguistics and NLP in Python},
year = {2020},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/jacksonllee/pycantonese}},
commit = {1d58f44e1cb097faa69de6b617e1d28903b84b98}
}
Contributions
Thanks to @j-chim for adding this dataset.
- Downloads last month
- 476