2 # -*- coding: utf-8 -*-
4 """aubio command line tool
6 This file was written by Paul Brossier <piem@aubio.org> and is released under
9 Note: this script is mostly about parsing command line arguments. For more
10 readable code examples, check out the `python/demos` folder."""
17 epilog = 'use "%(prog)s <command> --help" for more info about each command'
18 parser = argparse.ArgumentParser(epilog=epilog)
19 parser.add_argument('-V', '--version', help="show version",
20 action="store_true", dest="show_version")
22 subparsers = parser.add_subparsers(title='commands', dest='command',
26 subparser = subparsers.add_parser('onset',
27 help='estimate time of onsets (beginning of sound event)',
28 formatter_class = argparse.ArgumentDefaultsHelpFormatter)
29 parser_add_input(subparser)
30 parser_add_buf_hop_size(subparser)
31 helpstr = "onset novelty function"
32 helpstr += " <default|energy|hfc|complex|phase|specdiff|kl|mkl|specflux>"
33 parser_add_method(subparser, helpstr=helpstr)
34 parser_add_threshold(subparser)
35 parser_add_silence(subparser)
36 parser_add_minioi(subparser)
37 parser_add_time_format(subparser)
38 parser_add_verbose_help(subparser)
39 subparser.set_defaults(process=process_onset)
42 subparser = subparsers.add_parser('pitch',
43 help='estimate fundamental frequency (monophonic)')
44 parser_add_input(subparser)
45 parser_add_buf_hop_size(subparser, buf_size=2048)
46 helpstr = "pitch detection method <default|yinfft|yin|mcomb|fcomb|schmitt>"
47 parser_add_method(subparser, helpstr=helpstr)
48 parser_add_threshold(subparser)
49 parser_add_pitch_unit(subparser)
50 parser_add_silence(subparser)
51 parser_add_time_format(subparser)
52 parser_add_verbose_help(subparser)
53 subparser.set_defaults(process=process_pitch)
56 subparser = subparsers.add_parser('beat',
57 help='estimate location of beats')
58 parser_add_input(subparser)
59 parser_add_buf_hop_size(subparser, buf_size=1024, hop_size=512)
60 parser_add_time_format(subparser)
61 parser_add_verbose_help(subparser)
62 subparser.set_defaults(process=process_beat)
65 subparser = subparsers.add_parser('tempo',
66 help='estimate overall tempo in bpm')
67 parser_add_input(subparser)
68 parser_add_buf_hop_size(subparser, buf_size=1024, hop_size=512)
69 parser_add_time_format(subparser)
70 parser_add_verbose_help(subparser)
71 subparser.set_defaults(process=process_tempo)
74 subparser = subparsers.add_parser('notes',
75 help='estimate midi-like notes (monophonic)')
76 parser_add_input(subparser)
77 parser_add_buf_hop_size(subparser)
78 parser_add_time_format(subparser)
79 parser_add_verbose_help(subparser)
80 subparser.set_defaults(process=process_notes)
83 subparser = subparsers.add_parser('mfcc',
84 help='extract Mel-Frequency Cepstrum Coefficients')
85 parser_add_input(subparser)
86 parser_add_buf_hop_size(subparser)
87 parser_add_time_format(subparser)
88 parser_add_verbose_help(subparser)
89 subparser.set_defaults(process=process_mfcc)
92 subparser = subparsers.add_parser('melbands',
93 help='extract energies in Mel-frequency bands')
94 parser_add_input(subparser)
95 parser_add_buf_hop_size(subparser)
96 parser_add_time_format(subparser)
97 parser_add_verbose_help(subparser)
98 subparser.set_defaults(process=process_melbands)
102 def parser_add_input(parser):
103 parser.add_argument("source_uri", default=None, nargs='?',
104 help="input sound file to analyse", metavar = "<source_uri>")
105 parser.add_argument("-i", "--input", dest = "source_uri2",
106 help="input sound file to analyse", metavar = "<source_uri>")
107 parser.add_argument("-r", "--samplerate",
108 metavar = "<freq>", type=int,
109 action="store", dest="samplerate", default=0,
110 help="samplerate at which the file should be represented")
112 def parser_add_verbose_help(parser):
113 parser.add_argument("-v","--verbose",
114 action="count", dest="verbose", default=1,
115 help="make lots of noise [default]")
116 parser.add_argument("-q","--quiet",
117 action="store_const", dest="verbose", const=0,
120 def parser_add_buf_hop_size(parser, buf_size=512, hop_size=256):
121 parser.add_argument("-B","--bufsize",
122 action="store", dest="buf_size", default=buf_size,
123 metavar = "<size>", type=int,
124 help="buffer size [default=%d]" % buf_size)
125 parser.add_argument("-H","--hopsize",
126 metavar = "<size>", type=int,
127 action="store", dest="hop_size", default=hop_size,
128 help="overlap size [default=%d]" % hop_size)
130 def parser_add_method(parser, method='default', helpstr='method'):
131 parser.add_argument("-m","--method",
132 metavar = "<method>", type=str,
133 action="store", dest="method", default=method,
134 help="%s [default=%s]" % (helpstr, method))
136 def parser_add_threshold(parser, default=None):
137 parser.add_argument("-t","--threshold",
138 metavar = "<threshold>", type=float,
139 action="store", dest="threshold", default=default,
140 help="threshold [default=%s]" % default)
142 def parser_add_silence(parser):
143 parser.add_argument("-s", "--silence",
144 metavar = "<value>", type=float,
145 action="store", dest="silence", default=-70,
146 help="silence threshold")
148 def parser_add_minioi(parser):
149 parser.add_argument("-M", "--minioi",
150 metavar = "<value>", type=str,
151 action="store", dest="minioi", default="12ms",
152 help="minimum Inter-Onset Interval")
154 def parser_add_pitch_unit(parser, default="Hz"):
155 help_str = "frequency unit, should be one of Hz, midi, bin, cent"
156 help_str += " [default=%s]" % default
157 parser.add_argument("-u", "--pitch-unit",
158 metavar = "<value>", type=str,
159 action="store", dest="pitch_unit", default=default,
162 def parser_add_time_format(parser):
163 helpstr = "select time values output format (samples, ms, seconds)"
164 helpstr += " [default=seconds]"
165 parser.add_argument("-T", "--time-format",
173 def samples2seconds(n_frames, samplerate):
174 return "%f\t" % (n_frames / float(samplerate))
176 def samples2milliseconds(n_frames, samplerate):
177 return "%f\t" % (1000. * n_frames / float(samplerate))
179 def samples2samples(n_frames, samplerate):
180 return "%d\t" % n_frames
183 if mode is None or mode == 'seconds' or mode == 's':
184 return samples2seconds
185 elif mode == 'ms' or mode == 'milliseconds':
186 return samples2milliseconds
187 elif mode == 'samples':
188 return samples2samples
190 raise ValueError("invalid time format '%s'" % mode)
192 # definition of processing classes
194 class default_process(object):
195 def __init__(self, args):
196 if 'time_format' in args:
197 self.time2string = timefunc(args.time_format)
198 if args.verbose > 2 and hasattr(self, 'options'):
199 name = type(self).__name__.split('_')[1]
200 optstr = ' '.join(['running', name, 'with options', repr(self.options), '\n'])
201 sys.stderr.write(optstr)
202 def flush(self, n_frames, samplerate):
203 # optionally called at the end of process
206 def parse_options(self, args, valid_opts):
207 # get any valid options found in a dictionnary of arguments
208 options = {k :v for k,v in vars(args).items() if k in valid_opts}
209 self.options = options
211 def remap_pvoc_options(self, options):
212 # FIXME: we need to remap buf_size to win_s, hop_size to hop_s
213 # adjust python/ext/py-phasevoc.c to understand buf_size/hop_size
214 if 'buf_size' in options:
215 options['win_s'] = options['buf_size']
216 del options['buf_size']
217 if 'hop_size' in options:
218 options['hop_s'] = options['hop_size']
219 del options['hop_size']
220 self.options = options
222 class process_onset(default_process):
223 valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate']
224 def __init__(self, args):
225 self.parse_options(args, self.valid_opts)
226 self.onset = aubio.onset(**self.options)
227 if args.threshold is not None:
228 self.onset.set_threshold(args.threshold)
230 if args.minioi.endswith('ms'):
231 self.onset.set_minioi_ms(float(args.minioi[:-2]))
232 elif args.minioi.endswith('s'):
233 self.onset.set_minioi_s(float(args.minioi[:-1]))
235 self.onset.set_minioi(int(args.minioi))
237 self.onset.set_silence(args.silence)
238 super(process_onset, self).__init__(args)
239 def __call__(self, block):
240 return self.onset(block)
241 def repr_res(self, res, frames_read, samplerate):
243 outstr = self.time2string(self.onset.get_last(), samplerate)
244 sys.stdout.write(outstr + '\n')
246 class process_pitch(default_process):
247 valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate']
248 def __init__(self, args):
249 self.parse_options(args, self.valid_opts)
250 self.pitch = aubio.pitch(**self.options)
251 if args.pitch_unit is not None:
252 self.pitch.set_unit(args.pitch_unit)
253 if args.threshold is not None:
254 self.pitch.set_tolerance(args.threshold)
255 if args.silence is not None:
256 self.pitch.set_silence(args.silence)
257 super(process_pitch, self).__init__(args)
258 def __call__(self, block):
259 return self.pitch(block)
260 def repr_res(self, res, frames_read, samplerate):
261 fmt_out = self.time2string(frames_read, samplerate)
262 sys.stdout.write(fmt_out + "%.6f\n" % res[0])
264 class process_beat(default_process):
265 valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate']
266 def __init__(self, args):
267 self.parse_options(args, self.valid_opts)
268 self.tempo = aubio.tempo(**self.options)
269 super(process_beat, self).__init__(args)
270 def __call__(self, block):
271 return self.tempo(block)
272 def repr_res(self, res, frames_read, samplerate):
274 outstr = self.time2string(self.tempo.get_last(), samplerate)
275 sys.stdout.write(outstr + '\n')
277 class process_tempo(process_beat):
278 def __init__(self, args):
279 super(process_tempo, self).__init__(args)
280 self.beat_locations = []
281 def repr_res(self, res, frames_read, samplerate):
283 self.beat_locations.append(self.tempo.get_last_s())
284 def flush(self, frames_read, samplerate):
286 if len(self.beat_locations) < 2:
287 outstr = "unknown bpm"
289 bpms = 60./ np.diff(self.beat_locations)
290 median_bpm = np.mean(bpms)
291 if len(self.beat_locations) < 10:
292 outstr = "%.2f bpm (uncertain)" % median_bpm
294 outstr = "%.2f bpm" % median_bpm
295 sys.stdout.write(outstr + '\n')
297 class process_notes(default_process):
298 valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate']
299 def __init__(self, args):
300 self.parse_options(args, self.valid_opts)
301 self.notes = aubio.notes(**self.options)
302 super(process_notes, self).__init__(args)
303 def __call__(self, block):
304 return self.notes(block)
305 def repr_res(self, res, frames_read, samplerate):
306 if res[2] != 0: # note off
307 fmt_out = self.time2string(frames_read, samplerate)
308 sys.stdout.write(fmt_out + '\n')
309 if res[0] != 0: # note on
311 fmt_out = "%f\t" % lastmidi
312 fmt_out += self.time2string(frames_read, samplerate)
313 sys.stdout.write(fmt_out) # + '\t')
314 def flush(self, frames_read, samplerate):
315 eof = self.time2string(frames_read, samplerate)
316 sys.stdout.write(eof + '\n')
318 class process_mfcc(default_process):
319 def __init__(self, args):
320 valid_opts1 = ['hop_size', 'buf_size']
321 self.parse_options(args, valid_opts1)
322 self.remap_pvoc_options(self.options)
323 self.pv = aubio.pvoc(**self.options)
325 valid_opts2 = ['buf_size', 'n_filters', 'n_coeffs', 'samplerate']
326 self.parse_options(args, valid_opts2)
327 self.mfcc = aubio.mfcc(**self.options)
329 # remember all options
330 self.parse_options(args, list(set(valid_opts1 + valid_opts2)))
332 super(process_mfcc, self).__init__(args)
334 def __call__(self, block):
335 fftgrain = self.pv(block)
336 return self.mfcc(fftgrain)
337 def repr_res(self, res, frames_read, samplerate):
338 fmt_out = self.time2string(frames_read, samplerate)
339 fmt_out += ' '.join(["% 9.7f" % f for f in res.tolist()])
340 sys.stdout.write(fmt_out + '\n')
342 class process_melbands(default_process):
343 def __init__(self, args):
345 valid_opts = ['hop_size', 'buf_size']
346 self.parse_options(args, valid_opts)
347 self.remap_pvoc_options(self.options)
348 self.pv = aubio.pvoc(**self.options)
350 valid_opts = ['buf_size', 'n_filters']
351 self.parse_options(args, valid_opts)
352 self.remap_pvoc_options(self.options)
353 self.filterbank = aubio.filterbank(**self.options)
354 self.filterbank.set_mel_coeffs_slaney(args.samplerate)
356 super(process_melbands, self).__init__(args)
357 def __call__(self, block):
358 fftgrain = self.pv(block)
359 return self.filterbank(fftgrain)
360 def repr_res(self, res, frames_read, samplerate):
361 fmt_out = self.time2string(frames_read, samplerate)
362 fmt_out += ' '.join(["% 9.7f" % f for f in res.tolist()])
363 sys.stdout.write(fmt_out + '\n')
366 parser = aubio_parser()
367 args = parser.parse_args()
368 if 'show_version' in args and args.show_version:
369 sys.stdout.write('aubio version ' + aubio.version + '\n')
371 elif 'verbose' in args and args.verbose > 3:
372 sys.stderr.write('aubio version ' + aubio.version + '\n')
373 if 'command' not in args or args.command is None:
374 # no command given, print help and return 1
377 elif not args.source_uri and not args.source_uri2:
378 sys.stderr.write("Error: a source is required\n")
381 elif args.source_uri2 is not None:
382 args.source_uri = args.source_uri2
385 with aubio.source(args.source_uri, hop_size=args.hop_size,
386 samplerate=args.samplerate) as a_source:
387 # always update args.samplerate to native samplerate, in case
388 # source was opened with args.samplerate=0
389 args.samplerate = a_source.samplerate
390 # create the processor for this subcommand
391 processor = args.process(args)
394 # read new block from source
395 block, read = a_source()
396 # execute processor on this block
397 res = processor(block)
398 # print results for this block
400 processor.repr_res(res, frames_read, a_source.samplerate)
401 # increment total number of frames read
403 # exit loop at end of file
404 if read < a_source.hop_size: break
405 # flush the processor if needed
406 processor.flush(frames_read, a_source.samplerate)
408 fmt_string = "read {:.2f}s"
409 fmt_string += " ({:d} samples in {:d} blocks of {:d})"
410 fmt_string += " from {:s} at {:d}Hz\n"
411 sys.stderr.write(fmt_string.format(
412 frames_read/float(a_source.samplerate),
414 frames_read // a_source.hop_size + 1,
417 a_source.samplerate))
418 except KeyboardInterrupt: