Read and write ark/scp files from Kaldi
In this section, we demonstrate
how to generate ark/scp files that can be read by Kaldi
how to read ark/scp files from Kaldi
Generate ark/scp files
We provide kaldifst.VectorFstWriter
to save FSTs to ark/scp files.
The following is an example:
1#!/usr/bin/env python3
2import kaldifst
3
4
5def generate_acceptor():
6 s = """
7 0 1 a 0.5
8 1 2 b 0.8
9 2 0.25
10 """
11 isym = kaldifst.SymbolTable.from_str(
12 """
13 a 1
14 b 2
15 """
16 )
17 fsa = kaldifst.compile(
18 s,
19 acceptor=True,
20 isymbols=isym,
21 keep_isymbols=True,
22 )
23 return fsa
24
25
26def generate_transducer():
27 s = """
28 0 1 a A 0.5
29 1 2 b B 0.8
30 2 0.25
31 """
32 isym = kaldifst.SymbolTable.from_str(
33 """
34 a 1
35 b 2
36 """
37 )
38
39 osym = kaldifst.SymbolTable.from_str(
40 """
41 A 1
42 B 2
43 """
44 )
45 fst = kaldifst.compile(
46 s,
47 acceptor=False,
48 isymbols=isym,
49 osymbols=osym,
50 keep_isymbols=True,
51 keep_osymbols=True,
52 )
53 return fst
54
55
56def test_writer():
57 fsa = generate_acceptor()
58 fst = generate_transducer()
59
60 wspecifier = f"ark,scp:lat.ark,lat.scp"
61 with kaldifst.VectorFstWriter(wspecifier) as ko:
62 ko.write("lat1", fsa)
63 ko.write("lat2", fst)
64
65
66def main():
67 test_writer()
68
69
70if __name__ == "__main__":
71 main()
The above code creates two files
$ ls -lh lat.scp lat.ark
-rw-r--r-- 1 kuangfangjun root 452 Oct 8 23:18 lat.ark
-rw-r--r-- 1 kuangfangjun root 32 Oct 8 23:18 lat.scp
You can use fstcopy from Kaldi to view them:
$ fstcopy scp:lat.scp ark,t:-
fstcopy scp:lat.scp ark,t:-
lat1
0 1 a 1 0.5
1 2 b 2 0.8
2 0.25
lat2
0 1 a A 0.5
1 2 b B 0.8
2 0.25
LOG (fstcopy[5.5.1035~1-3dd90]:main():fstcopy.cc:71) Copied 2 FSTs.
Read ark/scp files sequentially
We provide kaldifst.SequentialVectorFstReader
to read FSTs from ark/scp files
sequentially. The following is an example:
1#!/usr/bin/env python3
2import kaldifst
3
4
5def test_sequential_reader():
6 rspecifier = f"scp:lat.scp"
7 with kaldifst.SequentialVectorFstReader(rspecifier) as ki:
8 for key, value in ki:
9 assert isinstance(value, kaldifst.StdVectorFst)
10 print("key", key)
11 print("value\n", str(value))
12
13
14def main():
15 test_sequential_reader()
16
17
18if __name__ == "__main__":
19 main()
The above code produces the following output:
key lat1
value
0 1 a 1 0.5
1 2 b 2 0.8
2 0.25
key lat2
value
0 1 a A 0.5
1 2 b B 0.8
2 0.25
Read ark/scp files randomly
We provide kaldifst.RandomAccessVectorFstReader
to read FSTs from ark/scp files
randomly. The following is an example:
1#!/usr/bin/env python3
2import kaldifst
3
4
5def test_random_access_reader():
6 rspecifier = f"scp:lat.scp"
7 with kaldifst.RandomAccessVectorFstReader(rspecifier) as ki:
8 assert "lat1" in ki
9 assert "lat2" in ki
10 lat1 = ki["lat1"]
11 print("lat1", lat1)
12
13 lat2 = ki["lat2"]
14 print("lat2", lat2)
15
16
17def main():
18 test_random_access_reader()
19
20
21if __name__ == "__main__":
22 main()
The above code produces the following output:
lat1 0 1 a 1 0.5
1 2 b 2 0.8
2 0.25
lat2 0 1 a A 0.5
1 2 b B 0.8
2 0.25