2 # -*- coding: utf-8 -*-
4 """aubio command line tool
6 This file was written by Paul Brossier <piem@aubio.org> and is released under
9 Note: this script is mostly about parsing command line arguments. For more
10 readable code examples, check out the `python/demos` folder."""
18 epilog = 'use "%(prog)s <command> --help" for more info about each command'
19 parser = argparse.ArgumentParser(epilog=epilog)
20 parser.add_argument('-V', '--version', help="show version",
21 action="store_true", dest="show_version")
23 subparsers = parser.add_subparsers(title='commands', dest='command',
24 parser_class= AubioArgumentParser,
27 parser_add_subcommand_help(subparsers)
29 parser_add_subcommand_onset(subparsers)
30 parser_add_subcommand_pitch(subparsers)
31 parser_add_subcommand_beat(subparsers)
32 parser_add_subcommand_tempo(subparsers)
33 parser_add_subcommand_notes(subparsers)
34 parser_add_subcommand_mfcc(subparsers)
35 parser_add_subcommand_melbands(subparsers)
36 parser_add_subcommand_quiet(subparsers)
37 parser_add_subcommand_cut(subparsers)
41 def parser_add_subcommand_help(subparsers):
42 # global help subcommand
43 subparsers.add_parser('help',
44 help='show help message',
45 formatter_class = argparse.ArgumentDefaultsHelpFormatter)
47 def parser_add_subcommand_onset(subparsers):
49 subparser = subparsers.add_parser('onset',
50 help='estimate time of onsets (beginning of sound event)',
51 formatter_class = argparse.ArgumentDefaultsHelpFormatter)
53 subparser.add_buf_hop_size()
54 helpstr = "onset novelty function"
55 helpstr += " <default|energy|hfc|complex|phase|specdiff|kl|mkl|specflux>"
56 subparser.add_method(helpstr=helpstr)
57 subparser.add_threshold()
58 subparser.add_silence()
59 subparser.add_minioi()
60 subparser.add_time_format()
61 subparser.add_verbose_help()
62 subparser.set_defaults(process=process_onset)
64 def parser_add_subcommand_pitch(subparsers):
66 subparser = subparsers.add_parser('pitch',
67 help='estimate fundamental frequency (monophonic)')
69 subparser.add_buf_hop_size(buf_size=2048)
70 helpstr = "pitch detection method <default|yinfft|yin|mcomb|fcomb|schmitt>"
71 subparser.add_method(helpstr=helpstr)
72 subparser.add_threshold()
73 subparser.add_pitch_unit()
74 subparser.add_silence()
75 subparser.add_time_format()
76 subparser.add_verbose_help()
77 subparser.set_defaults(process=process_pitch)
79 def parser_add_subcommand_beat(subparsers):
81 subparser = subparsers.add_parser('beat',
82 help='estimate location of beats')
84 subparser.add_buf_hop_size(buf_size=1024, hop_size=512)
85 subparser.add_time_format()
86 subparser.add_verbose_help()
87 subparser.set_defaults(process=process_beat)
89 def parser_add_subcommand_tempo(subparsers):
91 subparser = subparsers.add_parser('tempo',
92 help='estimate overall tempo in bpm')
94 subparser.add_buf_hop_size(buf_size=1024, hop_size=512)
95 subparser.add_time_format()
96 subparser.add_verbose_help()
97 subparser.set_defaults(process=process_tempo)
99 def parser_add_subcommand_notes(subparsers):
101 subparser = subparsers.add_parser('notes',
102 help='estimate midi-like notes (monophonic)')
103 subparser.add_input()
104 subparser.add_buf_hop_size()
105 subparser.add_silence()
106 subparser.add_release_drop()
107 subparser.add_time_format()
108 subparser.add_verbose_help()
109 subparser.set_defaults(process=process_notes)
111 def parser_add_subcommand_mfcc(subparsers):
113 subparser = subparsers.add_parser('mfcc',
114 help='extract Mel-Frequency Cepstrum Coefficients')
115 subparser.add_input()
116 subparser.add_buf_hop_size()
117 subparser.add_time_format()
118 subparser.add_verbose_help()
119 subparser.set_defaults(process=process_mfcc)
121 def parser_add_subcommand_melbands(subparsers):
122 # melbands subcommand
123 subparser = subparsers.add_parser('melbands',
124 help='extract energies in Mel-frequency bands')
125 subparser.add_input()
126 subparser.add_buf_hop_size()
127 subparser.add_time_format()
128 subparser.add_verbose_help()
129 subparser.set_defaults(process=process_melbands)
131 def parser_add_subcommand_quiet(subparsers):
133 subparser = subparsers.add_parser('quiet',
134 help='extract timestamps of quiet and loud regions')
135 subparser.add_input()
136 subparser.add_hop_size()
137 subparser.add_silence()
138 subparser.add_time_format()
139 subparser.add_verbose_help()
140 subparser.set_defaults(process=process_quiet)
142 def parser_add_subcommand_cut(subparsers):
144 subparser = subparsers.add_parser('cut',
145 help='slice at timestamps')
146 subparser.add_input()
147 helpstr = "onset novelty function"
148 helpstr += " <default|energy|hfc|complex|phase|specdiff|kl|mkl|specflux>"
149 subparser.add_method(helpstr=helpstr)
150 subparser.add_buf_hop_size()
151 subparser.add_silence()
152 subparser.add_threshold(default=0.3)
153 subparser.add_minioi()
154 subparser.add_slicer_options()
155 subparser.add_time_format()
156 subparser.add_verbose_help()
157 subparser.set_defaults(process=process_cut)
159 class AubioArgumentParser(argparse.ArgumentParser):
162 self.add_argument("source_uri", default=None, nargs='?',
163 help="input sound file to analyse", metavar = "<source_uri>")
164 self.add_argument("-i", "--input", dest = "source_uri2",
165 help="input sound file to analyse", metavar = "<source_uri>")
166 self.add_argument("-r", "--samplerate",
167 metavar = "<freq>", type=int,
168 action="store", dest="samplerate", default=0,
169 help="samplerate at which the file should be represented")
171 def add_verbose_help(self):
172 self.add_argument("-v", "--verbose",
173 action="count", dest="verbose", default=1,
174 help="make lots of noise [default]")
175 self.add_argument("-q", "--quiet",
176 action="store_const", dest="verbose", const=0,
179 def add_buf_hop_size(self, buf_size=512, hop_size=256):
180 self.add_buf_size(buf_size=buf_size)
181 self.add_hop_size(hop_size=hop_size)
183 def add_buf_size(self, buf_size=512):
184 self.add_argument("-B", "--bufsize",
185 action="store", dest="buf_size", default=buf_size,
186 metavar = "<size>", type=int,
187 help="buffer size [default=%d]" % buf_size)
189 def add_hop_size(self, hop_size=256):
190 self.add_argument("-H", "--hopsize",
191 metavar = "<size>", type=int,
192 action="store", dest="hop_size", default=hop_size,
193 help="overlap size [default=%d]" % hop_size)
195 def add_method(self, method='default', helpstr='method'):
196 self.add_argument("-m", "--method",
197 metavar = "<method>", type=str,
198 action="store", dest="method", default=method,
199 help="%s [default=%s]" % (helpstr, method))
201 def add_threshold(self, default=None):
202 self.add_argument("-t", "--threshold",
203 metavar = "<threshold>", type=float,
204 action="store", dest="threshold", default=default,
205 help="threshold [default=%s]" % default)
207 def add_silence(self):
208 self.add_argument("-s", "--silence",
209 metavar = "<value>", type=float,
210 action="store", dest="silence", default=-70,
211 help="silence threshold")
213 def add_release_drop(self):
214 self.add_argument("-d", "--release-drop",
215 metavar = "<value>", type=float,
216 action="store", dest="release_drop", default=10,
217 help="release drop threshold")
219 def add_minioi(self, default="12ms"):
220 self.add_argument("-M", "--minioi",
221 metavar = "<value>", type=str,
222 action="store", dest="minioi", default=default,
223 help="minimum Inter-Onset Interval [default=%s]" % default)
225 def add_pitch_unit(self, default="Hz"):
226 help_str = "frequency unit, should be one of Hz, midi, bin, cent"
227 help_str += " [default=%s]" % default
228 self.add_argument("-u", "--pitch-unit",
229 metavar = "<value>", type=str,
230 action="store", dest="pitch_unit", default=default,
233 def add_time_format(self):
234 helpstr = "select time values output format (samples, ms, seconds)"
235 helpstr += " [default=seconds]"
236 self.add_argument("-T", "--time-format",
242 def add_slicer_options(self):
243 self.add_argument("-o", "--output", type = str,
244 metavar = "<outputdir>",
245 action="store", dest="output_directory", default=None,
246 help="specify path where slices of the original file should"
248 self.add_argument("--cut-until-nsamples", type = int,
249 metavar = "<samples>",
250 action = "store", dest = "cut_until_nsamples", default = None,
251 help="how many extra samples should be added at the end of"
253 self.add_argument("--cut-every-nslices", type = int,
254 metavar = "<samples>",
255 action = "store", dest = "cut_every_nslices", default = None,
256 help="how many slices should be groupped together at each cut")
257 self.add_argument("--cut-until-nslices", type = int,
258 metavar = "<slices>",
259 action = "store", dest = "cut_until_nslices", default = None,
260 help="how many extra slices should be added at the end of"
262 self.add_argument("--create-first",
263 action = "store_true", dest = "create_first", default = False,
264 help="always include first slice")
268 def samples2seconds(n_frames, samplerate):
269 return "%f\t" % (n_frames / float(samplerate))
271 def samples2milliseconds(n_frames, samplerate):
272 return "%f\t" % (1000. * n_frames / float(samplerate))
274 def samples2samples(n_frames, _samplerate):
275 return "%d\t" % n_frames
278 if mode is None or mode == 'seconds' or mode == 's':
279 return samples2seconds
280 elif mode == 'ms' or mode == 'milliseconds':
281 return samples2milliseconds
282 elif mode == 'samples':
283 return samples2samples
285 raise ValueError("invalid time format '%s'" % mode)
287 # definition of processing classes
289 class default_process(object):
290 def __init__(self, args):
291 if 'time_format' in args:
292 self.time2string = timefunc(args.time_format)
293 if args.verbose > 2 and hasattr(self, 'options'):
294 name = type(self).__name__.split('_')[1]
295 optstr = ' '.join(['running', name, 'with options',
296 repr(self.options), '\n'])
297 sys.stderr.write(optstr)
298 def flush(self, frames_read, samplerate):
299 # optionally called at the end of process
302 def parse_options(self, args, valid_opts):
303 # get any valid options found in a dictionnary of arguments
304 options = {k: v for k, v in vars(args).items() if k in valid_opts}
305 self.options = options
307 def remap_pvoc_options(self, options):
308 # FIXME: we need to remap buf_size to win_s, hop_size to hop_s
309 # adjust python/ext/py-phasevoc.c to understand buf_size/hop_size
310 if 'buf_size' in options:
311 options['win_s'] = options['buf_size']
312 del options['buf_size']
313 if 'hop_size' in options:
314 options['hop_s'] = options['hop_size']
315 del options['hop_size']
316 self.options = options
318 class process_onset(default_process):
319 valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate']
320 def __init__(self, args):
321 self.parse_options(args, self.valid_opts)
322 self.onset = aubio.onset(**self.options)
323 if args.threshold is not None:
324 self.onset.set_threshold(args.threshold)
326 if args.minioi.endswith('ms'):
327 self.onset.set_minioi_ms(float(args.minioi[:-2]))
328 elif args.minioi.endswith('s'):
329 self.onset.set_minioi_s(float(args.minioi[:-1]))
331 self.onset.set_minioi(int(args.minioi))
333 self.onset.set_silence(args.silence)
334 super(process_onset, self).__init__(args)
335 def __call__(self, block):
336 return self.onset(block)
337 def repr_res(self, res, _frames_read, samplerate):
339 outstr = self.time2string(self.onset.get_last(), samplerate)
340 sys.stdout.write(outstr + '\n')
342 class process_pitch(default_process):
343 valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate']
344 def __init__(self, args):
345 self.parse_options(args, self.valid_opts)
346 self.pitch = aubio.pitch(**self.options)
347 if args.pitch_unit is not None:
348 self.pitch.set_unit(args.pitch_unit)
349 if args.threshold is not None:
350 self.pitch.set_tolerance(args.threshold)
351 if args.silence is not None:
352 self.pitch.set_silence(args.silence)
353 super(process_pitch, self).__init__(args)
354 def __call__(self, block):
355 return self.pitch(block)
356 def repr_res(self, res, frames_read, samplerate):
357 fmt_out = self.time2string(frames_read, samplerate)
358 sys.stdout.write(fmt_out + "%.6f\n" % res[0])
360 class process_beat(default_process):
361 valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate']
362 def __init__(self, args):
363 self.parse_options(args, self.valid_opts)
364 self.tempo = aubio.tempo(**self.options)
365 super(process_beat, self).__init__(args)
366 def __call__(self, block):
367 return self.tempo(block)
368 def repr_res(self, res, _frames_read, samplerate):
370 outstr = self.time2string(self.tempo.get_last(), samplerate)
371 sys.stdout.write(outstr + '\n')
373 class process_tempo(process_beat):
374 def __init__(self, args):
375 super(process_tempo, self).__init__(args)
376 self.beat_locations = []
377 def repr_res(self, res, _frames_read, samplerate):
379 self.beat_locations.append(self.tempo.get_last_s())
380 def flush(self, frames_read, samplerate):
382 if len(self.beat_locations) < 2:
383 outstr = "unknown bpm"
385 bpms = 60. / np.diff(self.beat_locations)
386 median_bpm = np.mean(bpms)
387 if len(self.beat_locations) < 10:
388 outstr = "%.2f bpm (uncertain)" % median_bpm
390 outstr = "%.2f bpm" % median_bpm
391 sys.stdout.write(outstr + '\n')
393 class process_notes(default_process):
394 valid_opts = ['method', 'hop_size', 'buf_size', 'samplerate']
395 def __init__(self, args):
396 self.parse_options(args, self.valid_opts)
397 self.notes = aubio.notes(**self.options)
398 if args.silence is not None:
399 self.notes.set_silence(args.silence)
400 if args.release_drop is not None:
401 self.notes.set_release_drop(args.release_drop)
402 super(process_notes, self).__init__(args)
403 def __call__(self, block):
404 return self.notes(block)
405 def repr_res(self, res, frames_read, samplerate):
406 if res[2] != 0: # note off
407 fmt_out = self.time2string(frames_read, samplerate)
408 sys.stdout.write(fmt_out + '\n')
409 if res[0] != 0: # note on
411 fmt_out = "%f\t" % lastmidi
412 fmt_out += self.time2string(frames_read, samplerate)
413 sys.stdout.write(fmt_out) # + '\t')
414 def flush(self, frames_read, samplerate):
415 eof = self.time2string(frames_read, samplerate)
416 sys.stdout.write(eof + '\n')
418 class process_mfcc(default_process):
419 def __init__(self, args):
420 valid_opts1 = ['hop_size', 'buf_size']
421 self.parse_options(args, valid_opts1)
422 self.remap_pvoc_options(self.options)
423 self.pv = aubio.pvoc(**self.options)
425 valid_opts2 = ['buf_size', 'n_filters', 'n_coeffs', 'samplerate']
426 self.parse_options(args, valid_opts2)
427 self.mfcc = aubio.mfcc(**self.options)
429 # remember all options
430 self.parse_options(args, list(set(valid_opts1 + valid_opts2)))
432 super(process_mfcc, self).__init__(args)
434 def __call__(self, block):
435 fftgrain = self.pv(block)
436 return self.mfcc(fftgrain)
437 def repr_res(self, res, frames_read, samplerate):
438 fmt_out = self.time2string(frames_read, samplerate)
439 fmt_out += ' '.join(["% 9.7f" % f for f in res.tolist()])
440 sys.stdout.write(fmt_out + '\n')
442 class process_melbands(default_process):
443 def __init__(self, args):
445 valid_opts = ['hop_size', 'buf_size']
446 self.parse_options(args, valid_opts)
447 self.remap_pvoc_options(self.options)
448 self.pv = aubio.pvoc(**self.options)
450 valid_opts = ['buf_size', 'n_filters']
451 self.parse_options(args, valid_opts)
452 self.remap_pvoc_options(self.options)
453 self.filterbank = aubio.filterbank(**self.options)
454 self.filterbank.set_mel_coeffs_slaney(args.samplerate)
456 super(process_melbands, self).__init__(args)
457 def __call__(self, block):
458 fftgrain = self.pv(block)
459 return self.filterbank(fftgrain)
460 def repr_res(self, res, frames_read, samplerate):
461 fmt_out = self.time2string(frames_read, samplerate)
462 fmt_out += ' '.join(["% 9.7f" % f for f in res.tolist()])
463 sys.stdout.write(fmt_out + '\n')
465 class process_quiet(default_process):
466 def __init__(self, args):
468 valid_opts = ['hop_size', 'silence']
469 self.parse_options(args, valid_opts)
472 if args.silence is not None:
473 self.silence = args.silence
474 super(process_quiet, self).__init__(args)
476 def __call__(self, block):
477 if aubio.silence_detection(block, self.silence) == 1:
478 if self.wassilence != 1:
480 return 2 # newly found silence
481 return 1 # silence again
483 if self.wassilence != 0:
485 return -1 # newly found noise
486 return 0 # noise again
488 def repr_res(self, res, frames_read, samplerate):
494 if fmt_out is not None:
495 fmt_out += self.time2string(frames_read, samplerate)
496 sys.stdout.write(fmt_out + '\n')
498 class process_cut(process_onset):
499 def __init__(self, args):
500 super(process_cut, self).__init__(args)
504 def __call__(self, block):
505 ret = super(process_cut, self).__call__(block)
507 self.slices.append(self.onset.get_last())
510 def flush(self, frames_read, samplerate):
511 _cut_slice(self.options, self.slices)
512 duration = float(frames_read) / float(samplerate)
513 base_info = '%(source_file)s' % \
514 {'source_file': self.options.source_uri}
515 base_info += ' (total %(duration).2fs at %(samplerate)dHz)\n' % \
516 {'duration': duration, 'samplerate': samplerate}
517 info = "created %d slices from " % len(self.slices)
519 sys.stderr.write(info)
521 def _cut_slice(options, timestamps):
523 nstamps = len(timestamps)
525 # generate output files
526 timestamps_end = None
527 if options.cut_every_nslices:
528 timestamps = timestamps[::options.cut_every_nslices]
529 nstamps = len(timestamps)
530 if options.cut_until_nslices and options.cut_until_nsamples:
531 msg = "using cut_until_nslices, but cut_until_nsamples is set"
533 if options.cut_until_nsamples:
534 lag = options.cut_until_nsamples
535 timestamps_end = [t + lag for t in timestamps[1:]]
536 timestamps_end += [1e120]
537 if options.cut_until_nslices:
538 slice_lag = options.cut_until_nslices
539 timestamps_end = [t for t in timestamps[1 + slice_lag:]]
540 timestamps_end += [1e120] * (options.cut_until_nslices + 1)
541 aubio.slice_source_at_stamps(options.source_uri,
542 timestamps, timestamps_end = timestamps_end,
543 output_dir = options.output_directory,
544 samplerate = options.samplerate,
545 create_first = options.create_first)
548 parser = aubio_parser()
549 if sys.version_info[0] != 3:
550 # on py2, create a dummy ArgumentParser to workaround the
551 # optional subcommand issue. See https://bugs.python.org/issue9253
553 # - version string is shown when only '-V' is passed
554 # - help is printed if '-V' is passed with any other argument
555 # - any other argument get forwarded to the real parser
556 parser_root = argparse.ArgumentParser(add_help=False)
557 parser_root.add_argument('-V', '--version', help="show version",
558 action="store_true", dest="show_version")
559 args, extras = parser_root.parse_known_args()
560 if not args.show_version: # no -V, forward to parser
561 args = parser.parse_args(extras, namespace=args)
562 elif len(extras) != 0: # -V with other arguments, print help
565 else: # in py3, we can simply use parser directly
566 args = parser.parse_args()
567 if 'show_version' in args and args.show_version:
568 sys.stdout.write('aubio version ' + aubio.version + '\n')
570 elif 'verbose' in args and args.verbose > 3:
571 sys.stderr.write('aubio version ' + aubio.version + '\n')
572 if 'command' not in args or args.command is None \
573 or args.command in ['help']:
574 # no command given, print help and return 1
576 if args.command and args.command in ['help']:
580 elif not args.source_uri and not args.source_uri2:
581 sys.stderr.write("Error: a source is required\n")
584 elif args.source_uri2 is not None:
585 args.source_uri = args.source_uri2
588 with aubio.source(args.source_uri, hop_size=args.hop_size,
589 samplerate=args.samplerate) as a_source:
590 # always update args.samplerate to native samplerate, in case
591 # source was opened with args.samplerate=0
592 args.samplerate = a_source.samplerate
593 # create the processor for this subcommand
594 processor = args.process(args)
597 # read new block from source
598 block, read = a_source()
599 # execute processor on this block
600 res = processor(block)
601 # print results for this block
603 processor.repr_res(res, frames_read, a_source.samplerate)
604 # increment total number of frames read
606 # exit loop at end of file
607 if read < a_source.hop_size:
609 # flush the processor if needed
610 processor.flush(frames_read, a_source.samplerate)
612 fmt_string = "read {:.2f}s"
613 fmt_string += " ({:d} samples in {:d} blocks of {:d})"
614 fmt_string += " from {:s} at {:d}Hz\n"
615 sys.stderr.write(fmt_string.format(
616 frames_read / float(a_source.samplerate),
618 frames_read // a_source.hop_size + 1,
621 a_source.samplerate))
622 except KeyboardInterrupt: