python/tests/: use local import, create __init__.py
[aubio.git] / python / tests / eval_pitch
1 #! /usr/bin/env python
2
3 """
4 Script to evaluate pitch algorithms against TONAS database.
5
6 See http://mtg.upf.edu/download/datasets/tonas/
7
8 Example run:
9
10     $ ./eval_pitch /path/to/TONAS/*/*.wav
11     OK:  94.74% vx r:  96.87% vx f:  15.83% f0:  96.02% %12:   0.50% /path/to/TONAS/Deblas/01-D_AMairena.wav
12     OK:  89.89% vx r:  93.21% vx f:  13.81% f0:  90.74% %12:   1.51% /path/to/TONAS/Deblas/02-D_ChanoLobato.wav
13     OK:  96.02% vx r:  96.73% vx f:  10.91% f0:  96.42% %12:   0.00% /path/to/TONAS/Deblas/03-D_Chocolate.wav
14     [...]
15     OK:  82.35% vx r:  95.52% vx f:  67.09% f0:  89.80% %12:   0.95% /path/to/TONAS/Martinetes2/80-M2_Rancapinos.wav
16     OK:  61.97% vx r:  85.71% vx f:  22.03% f0:  55.63% %12:   8.57% /path/to/TONAS/Martinetes2/81-M2_SDonday.wav
17     OK:  75.26% vx r:  91.63% vx f:  27.27% f0:  75.99% %12:   5.05% /path/to/TONAS/Martinetes2/82-M2_TiaAnicalaPiriniaca.wav
18     OK:  82.77% vx r:  92.74% vx f:  38.27% f0:  87.33% %12:   1.67% 69 files, total_length: 1177.69s, total runtime: 25.91s
19
20
21 """
22
23 import sys
24 import time
25 import os.path
26 import numpy
27 from .utils import array_from_text_file, array_from_yaml_file
28 from aubio import source, pitch, freqtomidi
29
30 start = time.time()
31
32 freq_tol = .50 # more or less half a tone
33
34 methods = ["default", "yinfft", "mcomb", "yin", "fcomb", "schmitt", "specacf"]
35 method = methods[0]
36
37 downsample = 1
38 tolerance =  0.35
39 silence = -40.
40 skip = 1
41 if method in ["yinfft", "default"]:
42     downsample = 1
43     tolerance = 0.45
44 elif method == "mcomb":
45     downsample = 4
46 elif method == "yin":
47     downsample = 4
48     tolerance = 0.2
49
50 samplerate = 44100 / downsample
51 hop_s = 512 / downsample
52 win_s = 2048 / downsample
53
54 def get_pitches (filename, samplerate = samplerate, win_s = win_s, hop_s = hop_s):
55     s = source(filename, samplerate, hop_s)
56     samplerate = s.samplerate
57
58     p = pitch(method, win_s, hop_s, samplerate)
59     p.set_unit("freq")
60     p.set_tolerance(tolerance)
61     p.set_silence(silence)
62
63     # list of pitches, in samples
64     pitches = []
65
66     # total number of frames read
67     total_frames = 0
68     while True:
69         samples, read = s()
70         new_pitch = p(samples)[0]
71         pitches.append([total_frames/float(samplerate), new_pitch])
72         total_frames += read
73         if read < hop_s: break
74     return numpy.array(pitches)
75
76 total_correct_f0, total_correct_sil, total_missed, total_incorrect, total_fp, total_total = 0, 0, 0, 0, 0, 0
77 total_correct_chroma, total_voiced = 0, 0
78 for source_file in sys.argv[1:]:
79     ground_truth_file = source_file.replace('.wav', '.f0.Corrected')
80     if os.path.isfile(ground_truth_file):
81         ground_truth = array_from_text_file(ground_truth_file)[:,[0,2]]
82         experiment = get_pitches(source_file)
83         # check that we have the same length, more or less one frame
84         assert abs(len(ground_truth) - len(experiment)) < 2
85         # align experiment by skipping first results
86         experiment = experiment[skip:]
87         experiment[:,0] -= experiment[0,0]
88         # trim to shortest list
89         maxlen = min(len(ground_truth), len(experiment))
90         experiment = experiment[:maxlen]
91         ground_truth = ground_truth[:maxlen]
92         # get difference matrix
93         diffmat = abs(experiment - ground_truth)
94         # make sure we got the timing right
95         assert max(diffmat[:,0]) < 10e-4, source_file
96         truth_pitches = freqtomidi(ground_truth[:,1])
97         exper_pitches = freqtomidi(experiment[:,1])
98
99         total = len(truth_pitches)
100         unvoiced = len(truth_pitches[truth_pitches == 0])
101         voiced = total - unvoiced
102         correct_sil, fp, missed, correct_f0, correct_chroma, incorrect = 0, 0, 0, 0, 0, 0
103         for a, b in zip(truth_pitches, exper_pitches):
104             if a == 0 and b == 0:
105                 correct_sil += 1
106             elif a == 0 and b != 0:
107                 fp += 1
108             elif a != 0 and b == 0:
109                 missed += 1
110             elif abs(b - a) < freq_tol:
111                 correct_f0 += 1
112             elif abs(b - a) % 12. < freq_tol:
113                 correct_chroma += 1
114             else:
115                 incorrect += 1
116         assert correct_sil + fp + missed + correct_f0 + correct_chroma + incorrect == total
117         assert unvoiced == correct_sil + fp
118         assert voiced == missed + correct_f0 + correct_chroma + incorrect
119         print "OK: %6s%%" % ("%.2f" % (100. * (correct_f0 + correct_sil) / total )),
120         print "vx r: %6s%%" % ("%.2f" % (100. - 100. * missed / voiced)),
121         print "vx f: %6s%%" % ("%.2f" % (100. * fp / unvoiced)),
122         print "f0: %6s%%" % ("%.2f" % (100. * correct_f0 / voiced)),
123         print "%%12: %6s%%" % ("%.2f" % (100. * correct_chroma / voiced)),
124         print source_file
125         total_correct_sil += correct_sil
126         total_correct_f0 += correct_f0
127         total_correct_chroma += correct_chroma
128         total_missed += missed
129         total_incorrect += incorrect
130         total_fp += fp
131         total_voiced += voiced
132         total_total += total
133     else:
134         print "ERR", "could not find ground_truth_file", ground_truth_file
135
136 print "OK: %6s%%" % ("%.2f" % (100. * (total_correct_f0 + total_correct_sil) / total_total )),
137 print "vx r: %6s%%" % ("%.2f" % (100. - 100. * total_missed / total_voiced)),
138 print "vx f: %6s%%" % ("%.2f" % (100. * (total_fp) / (total_correct_sil + total_fp))),
139 print "f0: %6s%%" % ("%.2f" % (100. * total_correct_f0 / total_voiced)),
140 print "%%12: %6s%%" % ("%.2f" % (100. * total_correct_chroma / total_voiced)),
141 print "%d files," % len(sys.argv[1:]),
142 print "total_length: %.2fs," % ((total_total * hop_s) / float(samplerate)),
143 print "total runtime: %.2fs" % (time.time() - start)