summaryrefslogtreecommitdiffhomepage
path: root/libs/auditok
diff options
context:
space:
mode:
authorLouis Vézina <[email protected]>2020-06-10 12:04:54 -0400
committerLouis Vézina <[email protected]>2020-06-10 12:04:54 -0400
commitc6548c06b7bb769af656d1eb18cc12e108260990 (patch)
treec99c6bf789f9c94d0776215ef205dc26564f310d /libs/auditok
parentf79faaa5c53306a37ee47f3c1725268c855a8f3d (diff)
downloadbazarr-c6548c06b7bb769af656d1eb18cc12e108260990.tar.gz
bazarr-c6548c06b7bb769af656d1eb18cc12e108260990.zip
Subsync first implementation (only after download/upload).
Diffstat (limited to 'libs/auditok')
-rw-r--r--libs/auditok/__init__.py19
-rw-r--r--libs/auditok/cmdline.py794
-rw-r--r--libs/auditok/core.py437
-rw-r--r--libs/auditok/data/1to6arabic_16000_mono_bc_noise.wavbin0 -> 601256 bytes
-rw-r--r--libs/auditok/data/was_der_mensch_saet_das_wird_er_vielfach_ernten_44100Hz_mono_lead_trail_silence.wavbin0 -> 1493036 bytes
-rw-r--r--libs/auditok/dataset.py18
-rw-r--r--libs/auditok/exceptions.py3
-rw-r--r--libs/auditok/io.py517
-rw-r--r--libs/auditok/util.py843
9 files changed, 2631 insertions, 0 deletions
diff --git a/libs/auditok/__init__.py b/libs/auditok/__init__.py
new file mode 100644
index 000000000..4534c7c9c
--- /dev/null
+++ b/libs/auditok/__init__.py
@@ -0,0 +1,19 @@
+"""
+:author:
+
+Amine SEHILI <[email protected]>
+2015-2018
+
+:License:
+
+This package is published under GNU GPL Version 3.
+"""
+
+from __future__ import absolute_import
+from .core import *
+from .io import *
+from .util import *
+from . import dataset
+from .exceptions import *
+
+__version__ = "0.1.8"
diff --git a/libs/auditok/cmdline.py b/libs/auditok/cmdline.py
new file mode 100644
index 000000000..5878b0ccc
--- /dev/null
+++ b/libs/auditok/cmdline.py
@@ -0,0 +1,794 @@
+#!/usr/bin/env python
+# encoding: utf-8
+'''
+auditok.auditok -- Audio Activity Detection tool
+
+auditok.auditok is a program that can be used for Audio/Acoustic activity detection.
+It can read audio data from audio files as well as from built-in device(s) or standard input
+
+
+@author: Mohamed El Amine SEHILI
+
+@copyright: 2015-2018 Mohamed El Amine SEHILI
+
+@license: GPL v3
+
+@deffield updated: 01 Nov 2018
+'''
+
+import sys
+import os
+
+from optparse import OptionParser, OptionGroup
+from threading import Thread
+import tempfile
+import wave
+import time
+import threading
+import logging
+
+try:
+ import future
+ from queue import Queue, Empty
+except ImportError:
+ if sys.version_info >= (3, 0):
+ from queue import Queue, Empty
+ else:
+ from Queue import Queue, Empty
+
+try:
+ from pydub import AudioSegment
+ WITH_PYDUB = True
+except ImportError:
+ WITH_PYDUB = False
+
+
+from .core import StreamTokenizer
+from .io import PyAudioSource, BufferAudioSource, StdinAudioSource, player_for
+from .util import ADSFactory, AudioEnergyValidator
+from auditok import __version__ as version
+
+__all__ = []
+__version__ = version
+__date__ = '2015-11-23'
+__updated__ = '2018-10-06'
+
+DEBUG = 0
+TESTRUN = 1
+PROFILE = 0
+
+LOGGER_NAME = "AUDITOK_LOGGER"
+
+class AudioFileFormatError(Exception):
+ pass
+
+class TimeFormatError(Exception):
+ pass
+
+def file_to_audio_source(filename, filetype=None, **kwargs):
+
+ lower_fname = filename.lower()
+ rawdata = False
+
+ if filetype is not None:
+ filetype = filetype.lower()
+
+ if filetype == "raw" or (filetype is None and lower_fname.endswith(".raw")):
+
+ srate = kwargs.pop("sampling_rate", None)
+ if srate is None:
+ srate = kwargs.pop("sr", None)
+
+ swidth = kwargs.pop("sample_width", None)
+ if swidth is None:
+ swidth = kwargs.pop("sw", None)
+
+ ch = kwargs.pop("channels", None)
+ if ch is None:
+ ch = kwargs.pop("ch", None)
+
+ if None in (swidth, srate, ch):
+ raise Exception("All audio parameters are required for raw data")
+
+ data = open(filename).read()
+ rawdata = True
+
+ # try first with pydub
+ if WITH_PYDUB:
+
+ use_channel = kwargs.pop("use_channel", None)
+ if use_channel is None:
+ use_channel = kwargs.pop("uc", None)
+
+ if use_channel is None:
+ use_channel = 1
+ else:
+ try:
+ use_channel = int(use_channel)
+ except ValueError:
+ pass
+
+ if not isinstance(use_channel, (int)) and not use_channel.lower() in ["left", "right", "mix"] :
+ raise ValueError("channel must be an integer or one of 'left', 'right' or 'mix'")
+
+ asegment = None
+
+ if rawdata:
+ asegment = AudioSegment(data, sample_width=swidth, frame_rate=srate, channels=ch)
+ if filetype in("wave", "wav") or (filetype is None and lower_fname.endswith(".wav")):
+ asegment = AudioSegment.from_wav(filename)
+ elif filetype == "mp3" or (filetype is None and lower_fname.endswith(".mp3")):
+ asegment = AudioSegment.from_mp3(filename)
+ elif filetype == "ogg" or (filetype is None and lower_fname.endswith(".ogg")):
+ asegment = AudioSegment.from_ogg(filename)
+ elif filetype == "flv" or (filetype is None and lower_fname.endswith(".flv")):
+ asegment = AudioSegment.from_flv(filename)
+ else:
+ asegment = AudioSegment.from_file(filename)
+
+ if asegment.channels > 1:
+
+ if isinstance(use_channel, int):
+ if use_channel > asegment.channels:
+ raise ValueError("Can not use channel '{0}', audio file has only {1} channels".format(use_channel, asegment.channels))
+ else:
+ asegment = asegment.split_to_mono()[use_channel - 1]
+ else:
+ ch_lower = use_channel.lower()
+
+ if ch_lower == "mix":
+ asegment = asegment.set_channels(1)
+
+ elif use_channel.lower() == "left":
+ asegment = asegment.split_to_mono()[0]
+
+ elif use_channel.lower() == "right":
+ asegment = asegment.split_to_mono()[1]
+
+ return BufferAudioSource(data_buffer = asegment._data,
+ sampling_rate = asegment.frame_rate,
+ sample_width = asegment.sample_width,
+ channels = asegment.channels)
+ # fall back to standard python
+ else:
+ if rawdata:
+ if ch != 1:
+ raise ValueError("Cannot handle multi-channel audio without pydub")
+ return BufferAudioSource(data, srate, swidth, ch)
+
+ if filetype in ("wav", "wave") or (filetype is None and lower_fname.endswith(".wav")):
+
+ wfp = wave.open(filename)
+
+ ch = wfp.getnchannels()
+ if ch != 1:
+ wfp.close()
+ raise ValueError("Cannot handle multi-channel audio without pydub")
+
+ srate = wfp.getframerate()
+ swidth = wfp.getsampwidth()
+ data = wfp.readframes(wfp.getnframes())
+ wfp.close()
+ return BufferAudioSource(data, srate, swidth, ch)
+
+ raise AudioFileFormatError("Cannot read audio file format")
+
+
+def save_audio_data(data, filename, filetype=None, **kwargs):
+
+ lower_fname = filename.lower()
+ if filetype is not None:
+ filetype = filetype.lower()
+
+ # save raw data
+ if filetype == "raw" or (filetype is None and lower_fname.endswith(".raw")):
+ fp = open(filename, "w")
+ fp.write(data)
+ fp.close()
+ return
+
+ # save other types of data
+ # requires all audio parameters
+ srate = kwargs.pop("sampling_rate", None)
+ if srate is None:
+ srate = kwargs.pop("sr", None)
+
+ swidth = kwargs.pop("sample_width", None)
+ if swidth is None:
+ swidth = kwargs.pop("sw", None)
+
+ ch = kwargs.pop("channels", None)
+ if ch is None:
+ ch = kwargs.pop("ch", None)
+
+ if None in (swidth, srate, ch):
+ raise Exception("All audio parameters are required to save no raw data")
+
+ if filetype in ("wav", "wave") or (filetype is None and lower_fname.endswith(".wav")):
+ # use standard python's wave module
+ fp = wave.open(filename, "w")
+ fp.setnchannels(ch)
+ fp.setsampwidth(swidth)
+ fp.setframerate(srate)
+ fp.writeframes(data)
+ fp.close()
+
+ elif WITH_PYDUB:
+
+ asegment = AudioSegment(data, sample_width=swidth, frame_rate=srate, channels=ch)
+ asegment.export(filename, format=filetype)
+
+ else:
+ raise AudioFileFormatError("cannot write file format {0} (file name: {1})".format(filetype, filename))
+
+
+def plot_all(signal, sampling_rate, energy_as_amp, detections=[], show=True, save_as=None):
+
+ import matplotlib.pyplot as plt
+ import numpy as np
+ t = np.arange(0., np.ceil(float(len(signal))) / sampling_rate, 1./sampling_rate )
+ if len(t) > len(signal):
+ t = t[: len(signal) - len(t)]
+
+ for start, end in detections:
+ p = plt.axvspan(start, end, facecolor='g', ec = 'r', lw = 2, alpha=0.4)
+
+ line = plt.axhline(y=energy_as_amp, lw=1, ls="--", c="r", label="Energy threshold as normalized amplitude")
+ plt.plot(t, signal)
+ legend = plt.legend(["Detection threshold"], bbox_to_anchor=(0., 1.02, 1., .102), loc=1, fontsize=16)
+ ax = plt.gca().add_artist(legend)
+
+ plt.xlabel("Time (s)", fontsize=24)
+ plt.ylabel("Amplitude (normalized)", fontsize=24)
+
+ if save_as is not None:
+ plt.savefig(save_as, dpi=120)
+
+ if show:
+ plt.show()
+
+
+def seconds_to_str_fromatter(_format):
+ """
+ Accepted format directives: %i %s %m %h
+ """
+ # check directives are correct
+
+ if _format == "%S":
+ def _fromatter(seconds):
+ return "{:.2f}".format(seconds)
+
+ elif _format == "%I":
+ def _fromatter(seconds):
+ return "{0}".format(int(seconds * 1000))
+
+ else:
+ _format = _format.replace("%h", "{hrs:02d}")
+ _format = _format.replace("%m", "{mins:02d}")
+ _format = _format.replace("%s", "{secs:02d}")
+ _format = _format.replace("%i", "{millis:03d}")
+
+ try:
+ i = _format.index("%")
+ raise TimeFormatError("Unknow time format directive '{0}'".format(_format[i:i+2]))
+ except ValueError:
+ pass
+
+ def _fromatter(seconds):
+ millis = int(seconds * 1000)
+ hrs, millis = divmod(millis, 3600000)
+ mins, millis = divmod(millis, 60000)
+ secs, millis = divmod(millis, 1000)
+ return _format.format(hrs=hrs, mins=mins, secs=secs, millis=millis)
+
+ return _fromatter
+
+
+
+class Worker(Thread):
+
+ def __init__(self, timeout=0.2, debug=False, logger=None):
+ self.timeout = timeout
+ self.debug = debug
+ self.logger = logger
+
+ if self.debug and self.logger is None:
+ self.logger = logging.getLogger(LOGGER_NAME)
+ self.logger.setLevel(logging.DEBUG)
+ handler = logging.StreamHandler(sys.stdout)
+ self.logger.addHandler(handler)
+
+ self._inbox = Queue()
+ self._stop_request = Queue()
+ Thread.__init__(self)
+
+
+ def debug_message(self, message):
+ self.logger.debug(message)
+
+ def _stop_requested(self):
+
+ try:
+ message = self._stop_request.get_nowait()
+ if message == "stop":
+ return True
+
+ except Empty:
+ return False
+
+ def stop(self):
+ self._stop_request.put("stop")
+ self.join()
+
+ def send(self, message):
+ self._inbox.put(message)
+
+ def _get_message(self):
+ try:
+ message = self._inbox.get(timeout=self.timeout)
+ return message
+ except Empty:
+ return None
+
+
+class TokenizerWorker(Worker):
+
+ END_OF_PROCESSING = "END_OF_PROCESSING"
+
+ def __init__(self, ads, tokenizer, analysis_window, observers):
+ self.ads = ads
+ self.tokenizer = tokenizer
+ self.analysis_window = analysis_window
+ self.observers = observers
+ self._inbox = Queue()
+ self.count = 0
+ Worker.__init__(self)
+
+ def run(self):
+
+ def notify_observers(data, start, end):
+ audio_data = b''.join(data)
+ self.count += 1
+
+ start_time = start * self.analysis_window
+ end_time = (end+1) * self.analysis_window
+ duration = (end - start + 1) * self.analysis_window
+
+ # notify observers
+ for observer in self.observers:
+ observer.notify({"id" : self.count,
+ "audio_data" : audio_data,
+ "start" : start,
+ "end" : end,
+ "start_time" : start_time,
+ "end_time" : end_time,
+ "duration" : duration}
+ )
+
+ self.ads.open()
+ self.tokenizer.tokenize(data_source=self, callback=notify_observers)
+ for observer in self.observers:
+ observer.notify(TokenizerWorker.END_OF_PROCESSING)
+
+ def add_observer(self, observer):
+ self.observers.append(observer)
+
+ def remove_observer(self, observer):
+ self.observers.remove(observer)
+
+ def read(self):
+ if self._stop_requested():
+ return None
+ else:
+ return self.ads.read()
+
+
+class PlayerWorker(Worker):
+
+ def __init__(self, player, timeout=0.2, debug=False, logger=None):
+ self.player = player
+ Worker.__init__(self, timeout=timeout, debug=debug, logger=logger)
+
+ def run(self):
+ while True:
+ if self._stop_requested():
+ break
+
+ message = self._get_message()
+ if message is not None:
+ if message == TokenizerWorker.END_OF_PROCESSING:
+ break
+
+ audio_data = message.pop("audio_data", None)
+ start_time = message.pop("start_time", None)
+ end_time = message.pop("end_time", None)
+ dur = message.pop("duration", None)
+ _id = message.pop("id", None)
+
+ if audio_data is not None:
+ if self.debug:
+ self.debug_message("[PLAY]: Detection {id} played (start:{start}, end:{end}, dur:{dur})".format(id=_id,
+ start="{:5.2f}".format(start_time), end="{:5.2f}".format(end_time), dur="{:5.2f}".format(dur)))
+ self.player.play(audio_data)
+
+ def notify(self, message):
+ self.send(message)
+
+
+class CommandLineWorker(Worker):
+
+ def __init__(self, command, timeout=0.2, debug=False, logger=None):
+ self.command = command
+ Worker.__init__(self, timeout=timeout, debug=debug, logger=logger)
+
+ def run(self):
+ while True:
+ if self._stop_requested():
+ break
+
+ message = self._get_message()
+ if message is not None:
+ if message == TokenizerWorker.END_OF_PROCESSING:
+ break
+
+ audio_data = message.pop("audio_data", None)
+ _id = message.pop("id", None)
+ if audio_data is not None:
+ raw_audio_file = tempfile.NamedTemporaryFile(delete=False)
+ raw_audio_file.write(audio_data)
+ cmd = self.command.replace("$", raw_audio_file.name)
+ if self.debug:
+ self.debug_message("[CMD ]: Detection {id} command: {cmd}".format(id=_id, cmd=cmd))
+ os.system(cmd)
+ os.unlink(raw_audio_file.name)
+
+ def notify(self, message):
+ self.send(message)
+
+
+class TokenSaverWorker(Worker):
+
+ def __init__(self, name_format, filetype, timeout=0.2, debug=False, logger=None, **kwargs):
+ self.name_format = name_format
+ self.filetype = filetype
+ self.kwargs = kwargs
+ Worker.__init__(self, timeout=timeout, debug=debug, logger=logger)
+
+ def run(self):
+ while True:
+ if self._stop_requested():
+ break
+
+ message = self._get_message()
+ if message is not None:
+ if message == TokenizerWorker.END_OF_PROCESSING:
+ break
+
+ audio_data = message.pop("audio_data", None)
+ start_time = message.pop("start_time", None)
+ end_time = message.pop("end_time", None)
+ _id = message.pop("id", None)
+ if audio_data is not None and len(audio_data) > 0:
+ fname = self.name_format.format(N=_id, start = "{:.2f}".format(start_time), end = "{:.2f}".format(end_time))
+ try:
+ if self.debug:
+ self.debug_message("[SAVE]: Detection {id} saved as {fname}".format(id=_id, fname=fname))
+ save_audio_data(audio_data, fname, filetype=self.filetype, **self.kwargs)
+ except Exception as e:
+ sys.stderr.write(str(e) + "\n")
+
+ def notify(self, message):
+ self.send(message)
+
+
+class LogWorker(Worker):
+
+ def __init__(self, print_detections=False, output_format="{start} {end}",
+ time_formatter=seconds_to_str_fromatter("%S"), timeout=0.2, debug=False, logger=None):
+
+ self.print_detections = print_detections
+ self.output_format = output_format
+ self.time_formatter = time_formatter
+ self.detections = []
+ Worker.__init__(self, timeout=timeout, debug=debug, logger=logger)
+
+ def run(self):
+ while True:
+ if self._stop_requested():
+ break
+
+ message = self._get_message()
+
+ if message is not None:
+
+ if message == TokenizerWorker.END_OF_PROCESSING:
+ break
+
+ audio_data = message.pop("audio_data", None)
+ _id = message.pop("id", None)
+ start = message.pop("start", None)
+ end = message.pop("end", None)
+ start_time = message.pop("start_time", None)
+ end_time = message.pop("end_time", None)
+ duration = message.pop("duration", None)
+ if audio_data is not None and len(audio_data) > 0:
+
+ if self.debug:
+ self.debug_message("[DET ]: Detection {id} (start:{start}, end:{end})".format(id=_id,
+ start="{:5.2f}".format(start_time),
+ end="{:5.2f}".format(end_time)))
+
+ if self.print_detections:
+ print(self.output_format.format(id = _id,
+ start = self.time_formatter(start_time),
+ end = self.time_formatter(end_time), duration = self.time_formatter(duration)))
+
+ self.detections.append((_id, start, end, start_time, end_time))
+
+
+ def notify(self, message):
+ self.send(message)
+
+
+
+def main(argv=None):
+ '''Command line options.'''
+
+ program_name = os.path.basename(sys.argv[0])
+ program_version = version
+ program_build_date = "%s" % __updated__
+
+ program_version_string = '%%prog %s (%s)' % (program_version, program_build_date)
+ #program_usage = '''usage: spam two eggs''' # optional - will be autogenerated by optparse
+ program_longdesc = '''''' # optional - give further explanation about what the program does
+ program_license = "Copyright 2015-2018 Mohamed El Amine SEHILI \
+ Licensed under the General Public License (GPL) Version 3 \nhttp://www.gnu.org/licenses/"
+
+ if argv is None:
+ argv = sys.argv[1:]
+ try:
+ # setup option parser
+ parser = OptionParser(version=program_version_string, epilog=program_longdesc, description=program_license)
+
+ group = OptionGroup(parser, "[Input-Output options]")
+ group.add_option("-i", "--input", dest="input", help="Input audio or video file. Use - for stdin [default: read from microphone using pyaudio]", metavar="FILE")
+ group.add_option("-t", "--input-type", dest="input_type", help="Input audio file type. Mandatory if file name has no extension [default: %default]", type=str, default=None, metavar="String")
+ group.add_option("-M", "--max_time", dest="max_time", help="Max data (in seconds) to read from microphone/file [default: read until the end of file/stream]", type=float, default=None, metavar="FLOAT")
+ group.add_option("-O", "--output-main", dest="output_main", help="Save main stream as. If omitted main stream will not be saved [default: omitted]", type=str, default=None, metavar="FILE")
+ group.add_option("-o", "--output-tokens", dest="output_tokens", help="Output file name format for detections. Use {N} and {start} and {end} to build file names, example: 'Det_{N}_{start}-{end}.wav'", type=str, default=None, metavar="STRING")
+ group.add_option("-T", "--output-type", dest="output_type", help="Audio type used to save detections and/or main stream. If not supplied will: (1). guess from extension or (2). use wav format", type=str, default=None, metavar="STRING")
+ group.add_option("-u", "--use-channel", dest="use_channel", help="Choose channel to use from a multi-channel audio file (requires pydub). 'left', 'right' and 'mix' are accepted values. [Default: 1 (i.e. 1st or left channel)]", type=str, default="1", metavar="STRING")
+ parser.add_option_group(group)
+
+
+ group = OptionGroup(parser, "[Tokenization options]", "Set tokenizer options and energy threshold.")
+ group.add_option("-a", "--analysis-window", dest="analysis_window", help="Size of analysis window in seconds [default: %default (10ms)]", type=float, default=0.01, metavar="FLOAT")
+ group.add_option("-n", "--min-duration", dest="min_duration", help="Min duration of a valid audio event in seconds [default: %default]", type=float, default=0.2, metavar="FLOAT")
+ group.add_option("-m", "--max-duration", dest="max_duration", help="Max duration of a valid audio event in seconds [default: %default]", type=float, default=5, metavar="FLOAT")
+ group.add_option("-s", "--max-silence", dest="max_silence", help="Max duration of a consecutive silence within a valid audio event in seconds [default: %default]", type=float, default=0.3, metavar="FLOAT")
+ group.add_option("-d", "--drop-trailing-silence", dest="drop_trailing_silence", help="Drop trailing silence from a detection [default: keep trailing silence]", action="store_true", default=False)
+ group.add_option("-e", "--energy-threshold", dest="energy_threshold", help="Log energy threshold for detection [default: %default]", type=float, default=50, metavar="FLOAT")
+ parser.add_option_group(group)
+
+
+ group = OptionGroup(parser, "[Audio parameters]", "Define audio parameters if data is read from a headerless file (raw or stdin) or you want to use different microphone parameters.")
+ group.add_option("-r", "--rate", dest="sampling_rate", help="Sampling rate of audio data [default: %default]", type=int, default=16000, metavar="INT")
+ group.add_option("-c", "--channels", dest="channels", help="Number of channels of audio data [default: %default]", type=int, default=1, metavar="INT")
+ group.add_option("-w", "--width", dest="sample_width", help="Number of bytes per audio sample [default: %default]", type=int, default=2, metavar="INT")
+ group.add_option("-I", "--input-device-index", dest="input_device_index", help="Audio device index [default: %default] - only when using PyAudio", type=int, default=None, metavar="INT")
+ group.add_option("-F", "--audio-frame-per-buffer", dest="frame_per_buffer", help="Audio frame per buffer [default: %default] - only when using PyAudio", type=int, default=1024, metavar="INT")
+ parser.add_option_group(group)
+
+ group = OptionGroup(parser, "[Do something with detections]", "Use these options to print, play or plot detections.")
+ group.add_option("-C", "--command", dest="command", help="Command to call when an audio detection occurs. Use $ to represent the file name to use with the command (e.g. -C 'du -h $')", default=None, type=str, metavar="STRING")
+ group.add_option("-E", "--echo", dest="echo", help="Play back each detection immediately using pyaudio [default: do not play]", action="store_true", default=False)
+ group.add_option("-p", "--plot", dest="plot", help="Plot and show audio signal and detections (requires matplotlib)", action="store_true", default=False)
+ group.add_option("", "--save-image", dest="save_image", help="Save plotted audio signal and detections as a picture or a PDF file (requires matplotlib)", type=str, default=None, metavar="FILE")
+ group.add_option("", "--printf", dest="printf", help="print detections, one per line, using a user supplied format (e.g. '[{id}]: {start} -- {end}'). Available keywords {id}, {start}, {end} and {duration}", type=str, default="{id} {start} {end}", metavar="STRING")
+ group.add_option("", "--time-format", dest="time_format", help="format used to print {start} and {end}. [Default= %default]. %S: absolute time in sec. %I: absolute time in ms. If at least one of (%h, %m, %s, %i) is used, convert time into hours, minutes, seconds and millis (e.g. %h:%m:%s.%i). Only required fields are printed", type=str, default="%S", metavar="STRING")
+ parser.add_option_group(group)
+
+ parser.add_option("-q", "--quiet", dest="quiet", help="Do not print any information about detections [default: print 'id', 'start' and 'end' of each detection]", action="store_true", default=False)
+ parser.add_option("-D", "--debug", dest="debug", help="Print processing operations to STDOUT", action="store_true", default=False)
+ parser.add_option("", "--debug-file", dest="debug_file", help="Print processing operations to FILE", type=str, default=None, metavar="FILE")
+
+
+
+ # process options
+ (opts, args) = parser.parse_args(argv)
+
+ if opts.input == "-":
+ asource = StdinAudioSource(sampling_rate = opts.sampling_rate,
+ sample_width = opts.sample_width,
+ channels = opts.channels)
+ #read data from a file
+ elif opts.input is not None:
+ asource = file_to_audio_source(filename=opts.input, filetype=opts.input_type, uc=opts.use_channel)
+
+ # read data from microphone via pyaudio
+ else:
+ try:
+ asource = PyAudioSource(sampling_rate = opts.sampling_rate,
+ sample_width = opts.sample_width,
+ channels = opts.channels,
+ frames_per_buffer = opts.frame_per_buffer,
+ input_device_index = opts.input_device_index)
+ except Exception:
+ sys.stderr.write("Cannot read data from audio device!\n")
+ sys.stderr.write("You should either install pyaudio or read data from STDIN\n")
+ sys.exit(2)
+
+ logger = logging.getLogger(LOGGER_NAME)
+ logger.setLevel(logging.DEBUG)
+
+ handler = logging.StreamHandler(sys.stdout)
+ if opts.quiet or not opts.debug:
+ # only critical messages will be printed
+ handler.setLevel(logging.CRITICAL)
+ else:
+ handler.setLevel(logging.DEBUG)
+
+ logger.addHandler(handler)
+
+ if opts.debug_file is not None:
+ logger.setLevel(logging.DEBUG)
+ opts.debug = True
+ handler = logging.FileHandler(opts.debug_file, "w")
+ fmt = logging.Formatter('[%(asctime)s] | %(message)s')
+ handler.setFormatter(fmt)
+ handler.setLevel(logging.DEBUG)
+ logger.addHandler(handler)
+
+ record = opts.output_main is not None or opts.plot or opts.save_image is not None
+
+ ads = ADSFactory.ads(audio_source = asource, block_dur = opts.analysis_window, max_time = opts.max_time, record = record)
+ validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=opts.energy_threshold)
+
+
+ if opts.drop_trailing_silence:
+ mode = StreamTokenizer.DROP_TRAILING_SILENCE
+ else:
+ mode = 0
+
+ analysis_window_per_second = 1. / opts.analysis_window
+ tokenizer = StreamTokenizer(validator=validator, min_length=opts.min_duration * analysis_window_per_second,
+ max_length=int(opts.max_duration * analysis_window_per_second),
+ max_continuous_silence=opts.max_silence * analysis_window_per_second,
+ mode = mode)
+
+
+ observers = []
+ tokenizer_worker = None
+
+ if opts.output_tokens is not None:
+
+ try:
+ # check user format is correct
+ fname = opts.output_tokens.format(N=0, start=0, end=0)
+
+ # find file type for detections
+ tok_type = opts.output_type
+ if tok_type is None:
+ tok_type = os.path.splitext(opts.output_tokens)[1][1:]
+ if tok_type == "":
+ tok_type = "wav"
+
+ token_saver = TokenSaverWorker(name_format=opts.output_tokens, filetype=tok_type,
+ debug=opts.debug, logger=logger, sr=asource.get_sampling_rate(),
+ sw=asource.get_sample_width(),
+ ch=asource.get_channels())
+ observers.append(token_saver)
+
+ except Exception:
+ sys.stderr.write("Wrong format for detections file name: '{0}'\n".format(opts.output_tokens))
+ sys.exit(2)
+
+ if opts.echo:
+ try:
+ player = player_for(asource)
+ player_worker = PlayerWorker(player=player, debug=opts.debug, logger=logger)
+ observers.append(player_worker)
+ except Exception:
+ sys.stderr.write("Cannot get an audio player!\n")
+ sys.stderr.write("You should either install pyaudio or supply a command (-C option) to play audio\n")
+ sys.exit(2)
+
+ if opts.command is not None and len(opts.command) > 0:
+ cmd_worker = CommandLineWorker(command=opts.command, debug=opts.debug, logger=logger)
+ observers.append(cmd_worker)
+
+ if not opts.quiet or opts.plot is not None or opts.save_image is not None:
+ oformat = opts.printf.replace("\\n", "\n").replace("\\t", "\t").replace("\\r", "\r")
+ converter = seconds_to_str_fromatter(opts.time_format)
+ log_worker = LogWorker(print_detections = not opts.quiet, output_format=oformat,
+ time_formatter=converter, logger=logger, debug=opts.debug)
+ observers.append(log_worker)
+
+ tokenizer_worker = TokenizerWorker(ads, tokenizer, opts.analysis_window, observers)
+
+ def _save_main_stream():
+ # find file type
+ main_type = opts.output_type
+ if main_type is None:
+ main_type = os.path.splitext(opts.output_main)[1][1:]
+ if main_type == "":
+ main_type = "wav"
+ ads.close()
+ ads.rewind()
+ data = ads.get_audio_source().get_data_buffer()
+ if len(data) > 0:
+ save_audio_data(data=data, filename=opts.output_main, filetype=main_type, sr=asource.get_sampling_rate(),
+ sw = asource.get_sample_width(),
+ ch = asource.get_channels())
+
+ def _plot():
+ import numpy as np
+ ads.close()
+ ads.rewind()
+ data = ads.get_audio_source().get_data_buffer()
+ signal = AudioEnergyValidator._convert(data, asource.get_sample_width())
+ detections = [(det[3] , det[4]) for det in log_worker.detections]
+ max_amplitude = 2**(asource.get_sample_width() * 8 - 1) - 1
+ energy_as_amp = np.sqrt(np.exp(opts.energy_threshold * np.log(10) / 10)) / max_amplitude
+ plot_all(signal / max_amplitude, asource.get_sampling_rate(), energy_as_amp, detections, show = opts.plot, save_as = opts.save_image)
+
+
+ # start observer threads
+ for obs in observers:
+ obs.start()
+ # start tokenization thread
+ tokenizer_worker.start()
+
+ while True:
+ time.sleep(1)
+ if len(threading.enumerate()) == 1:
+ break
+
+ tokenizer_worker = None
+
+ if opts.output_main is not None:
+ _save_main_stream()
+ if opts.plot or opts.save_image is not None:
+ _plot()
+
+ return 0
+
+ except KeyboardInterrupt:
+
+ if tokenizer_worker is not None:
+ tokenizer_worker.stop()
+ for obs in observers:
+ obs.stop()
+
+ if opts.output_main is not None:
+ _save_main_stream()
+ if opts.plot or opts.save_image is not None:
+ _plot()
+
+ return 0
+
+ except Exception as e:
+ sys.stderr.write(program_name + ": " + str(e) + "\n")
+ sys.stderr.write("for help use -h\n")
+
+ return 2
+
+if __name__ == "__main__":
+ if DEBUG:
+ sys.argv.append("-h")
+ if TESTRUN:
+ import doctest
+ doctest.testmod()
+ if PROFILE:
+ import cProfile
+ import pstats
+ profile_filename = 'auditok.auditok_profile.txt'
+ cProfile.run('main()', profile_filename)
+ statsfile = open("profile_stats.txt", "wb")
+ p = pstats.Stats(profile_filename, stream=statsfile)
+ stats = p.strip_dirs().sort_stats('cumulative')
+ stats.print_stats()
+ statsfile.close()
+ sys.exit(0)
+ sys.exit(main())
diff --git a/libs/auditok/core.py b/libs/auditok/core.py
new file mode 100644
index 000000000..fa2ab598c
--- /dev/null
+++ b/libs/auditok/core.py
@@ -0,0 +1,437 @@
+"""
+This module gathers processing (i.e. tokenization) classes.
+
+Class summary
+=============
+
+.. autosummary::
+
+ StreamTokenizer
+"""
+
+from auditok.util import DataValidator
+
+__all__ = ["StreamTokenizer"]
+
+
+class StreamTokenizer():
+ """
+ Class for stream tokenizers. It implements a 4-state automaton scheme
+ to extract sub-sequences of interest on the fly.
+
+ :Parameters:
+
+ `validator` :
+ instance of `DataValidator` that implements `is_valid` method.
+
+ `min_length` : *(int)*
+ Minimum number of frames of a valid token. This includes all \
+ tolerated non valid frames within the token.
+
+ `max_length` : *(int)*
+ Maximum number of frames of a valid token. This includes all \
+ tolerated non valid frames within the token.
+
+ `max_continuous_silence` : *(int)*
+ Maximum number of consecutive non-valid frames within a token.
+ Note that, within a valid token, there may be many tolerated \
+ *silent* regions that contain each a number of non valid frames up to \
+ `max_continuous_silence`
+
+ `init_min` : *(int, default=0)*
+ Minimum number of consecutive valid frames that must be **initially** \
+ gathered before any sequence of non valid frames can be tolerated. This
+ option is not always needed, it can be used to drop non-valid tokens as
+ early as possible. **Default = 0** means that the option is by default
+ ineffective.
+
+ `init_max_silence` : *(int, default=0)*
+ Maximum number of tolerated consecutive non-valid frames if the \
+ number already gathered valid frames has not yet reached 'init_min'.
+ This argument is normally used if `init_min` is used. **Default = 0**,
+ by default this argument is not taken into consideration.
+
+ `mode` : *(int, default=0)*
+ `mode` can be:
+
+ 1. `StreamTokenizer.STRICT_MIN_LENGTH`:
+ if token *i* is delivered because `max_length`
+ is reached, and token *i+1* is immediately adjacent to
+ token *i* (i.e. token *i* ends at frame *k* and token *i+1* starts
+ at frame *k+1*) then accept token *i+1* only of it has a size of at
+ least `min_length`. The default behavior is to accept token *i+1*
+ event if it is shorter than `min_length` (given that the above conditions
+ are fulfilled of course).
+
+ :Examples:
+
+ In the following code, without `STRICT_MIN_LENGTH`, the 'BB' token is
+ accepted although it is shorter than `min_length` (3), because it immediately
+ follows the latest delivered token:
+
+ .. code:: python
+
+ from auditok import StreamTokenizer, StringDataSource, DataValidator
+
+ class UpperCaseChecker(DataValidator):
+ def is_valid(self, frame):
+ return frame.isupper()
+
+
+ dsource = StringDataSource("aaaAAAABBbbb")
+ tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
+ min_length=3,
+ max_length=4,
+ max_continuous_silence=0)
+
+ tokenizer.tokenize(dsource)
+
+ :output:
+
+ .. code:: python
+
+ [(['A', 'A', 'A', 'A'], 3, 6), (['B', 'B'], 7, 8)]
+
+
+ The following tokenizer will however reject the 'BB' token:
+
+ .. code:: python
+
+ dsource = StringDataSource("aaaAAAABBbbb")
+ tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
+ min_length=3, max_length=4,
+ max_continuous_silence=0,
+ mode=StreamTokenizer.STRICT_MIN_LENGTH)
+ tokenizer.tokenize(dsource)
+
+ :output:
+
+ .. code:: python
+
+ [(['A', 'A', 'A', 'A'], 3, 6)]
+
+
+ 2. `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all tailing non-valid frames
+ from a token to be delivered if and only if it is not **truncated**.
+ This can be a bit tricky. A token is actually delivered if:
+
+ - a. `max_continuous_silence` is reached
+
+ :or:
+
+ - b. Its length reaches `max_length`. This is called a **truncated** token
+
+ In the current implementation, a `StreamTokenizer`'s decision is only based on already seen
+ data and on incoming data. Thus, if a token is truncated at a non-valid but tolerated
+ frame (`max_length` is reached but `max_continuous_silence` not yet) any tailing
+ silence will be kept because it can potentially be part of valid token (if `max_length`
+ was bigger). But if `max_continuous_silence` is reached before `max_length`, the delivered
+ token will not be considered as truncated but a result of *normal* end of detection
+ (i.e. no more valid data). In that case the tailing silence can be removed if you use
+ the `StreamTokenizer.DROP_TRAILING_SILENCE` mode.
+
+ :Example:
+
+ .. code:: python
+
+ tokenizer = StreamTokenizer(validator=UpperCaseChecker(), min_length=3,
+ max_length=6, max_continuous_silence=3,
+ mode=StreamTokenizer.DROP_TRAILING_SILENCE)
+
+ dsource = StringDataSource("aaaAAAaaaBBbbbb")
+ tokenizer.tokenize(dsource)
+
+ :output:
+
+ .. code:: python
+
+ [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B'], 9, 10)]
+
+ The first token is delivered with its tailing silence because it is truncated
+ while the second one has its tailing frames removed.
+
+ Without `StreamTokenizer.DROP_TRAILING_SILENCE` the output would be:
+
+ .. code:: python
+
+ [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B', 'b', 'b', 'b'], 9, 13)]
+
+
+ 3. `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`:
+ use both options. That means: first remove tailing silence, then ckeck if the
+ token still has at least a length of `min_length`.
+ """
+
+ SILENCE = 0
+ POSSIBLE_SILENCE = 1
+ POSSIBLE_NOISE = 2
+ NOISE = 3
+
+ STRICT_MIN_LENGTH = 2
+ DROP_TRAILING_SILENCE = 4
+ # alias
+ DROP_TAILING_SILENCE = 4
+
+ def __init__(self, validator,
+ min_length, max_length, max_continuous_silence,
+ init_min=0, init_max_silence=0,
+ mode=0):
+
+ if not isinstance(validator, DataValidator):
+ raise TypeError("'validator' must be an instance of 'DataValidator'")
+
+ if max_length <= 0:
+ raise ValueError("'max_length' must be > 0 (value={0})".format(max_length))
+
+ if min_length <= 0 or min_length > max_length:
+ raise ValueError("'min_length' must be > 0 and <= 'max_length' (value={0})".format(min_length))
+
+ if max_continuous_silence >= max_length:
+ raise ValueError("'max_continuous_silence' must be < 'max_length' (value={0})".format(max_continuous_silence))
+
+ if init_min >= max_length:
+ raise ValueError("'init_min' must be < 'max_length' (value={0})".format(max_continuous_silence))
+
+ self.validator = validator
+ self.min_length = min_length
+ self.max_length = max_length
+ self.max_continuous_silence = max_continuous_silence
+ self.init_min = init_min
+ self.init_max_silent = init_max_silence
+
+ self._mode = None
+ self.set_mode(mode)
+ self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
+ self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
+
+ self._deliver = None
+ self._tokens = None
+ self._state = None
+ self._data = None
+ self._contiguous_token = False
+
+ self._init_count = 0
+ self._silence_length = 0
+ self._start_frame = 0
+ self._current_frame = 0
+
+ def set_mode(self, mode):
+ """
+ :Parameters:
+
+ `mode` : *(int)*
+ New mode, must be one of:
+
+
+ - `StreamTokenizer.STRICT_MIN_LENGTH`
+
+ - `StreamTokenizer.DROP_TRAILING_SILENCE`
+
+ - `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`
+
+ - `0`
+
+ See `StreamTokenizer.__init__` for more information about the mode.
+ """
+
+ if not mode in [self.STRICT_MIN_LENGTH, self.DROP_TRAILING_SILENCE,
+ self.STRICT_MIN_LENGTH | self.DROP_TRAILING_SILENCE, 0]:
+
+ raise ValueError("Wrong value for mode")
+
+ self._mode = mode
+ self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
+ self._drop_tailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
+
+ def get_mode(self):
+ """
+ Return the current mode. To check whether a specific mode is activated use
+ the bitwise 'and' operator `&`. Example:
+
+ .. code:: python
+
+ if mode & self.STRICT_MIN_LENGTH != 0:
+ do_something()
+ """
+ return self._mode
+
+ def _reinitialize(self):
+ self._contiguous_token = False
+ self._data = []
+ self._tokens = []
+ self._state = self.SILENCE
+ self._current_frame = -1
+ self._deliver = self._append_token
+
+ def tokenize(self, data_source, callback=None):
+ """
+ Read data from `data_source`, one frame a time, and process the read frames in
+ order to detect sequences of frames that make up valid tokens.
+
+ :Parameters:
+ `data_source` : instance of the :class:`DataSource` class that implements a `read` method.
+ 'read' should return a slice of signal, i.e. frame (of whatever \
+ type as long as it can be processed by validator) and None if \
+ there is no more signal.
+
+ `callback` : an optional 3-argument function.
+ If a `callback` function is given, it will be called each time a valid token
+ is found.
+
+
+ :Returns:
+ A list of tokens if `callback` is None. Each token is tuple with the following elements:
+
+ .. code python
+
+ (data, start, end)
+
+ where `data` is a list of read frames, `start`: index of the first frame in the
+ original data and `end` : index of the last frame.
+
+ """
+
+ self._reinitialize()
+
+ if callback is not None:
+ self._deliver = callback
+
+ while True:
+ frame = data_source.read()
+ if frame is None:
+ break
+ self._current_frame += 1
+ self._process(frame)
+
+ self._post_process()
+
+ if callback is None:
+ _ret = self._tokens
+ self._tokens = None
+ return _ret
+
+ def _process(self, frame):
+
+ frame_is_valid = self.validator.is_valid(frame)
+
+ if self._state == self.SILENCE:
+
+ if frame_is_valid:
+ # seems we got a valid frame after a silence
+ self._init_count = 1
+ self._silence_length = 0
+ self._start_frame = self._current_frame
+ self._data.append(frame)
+
+ if self._init_count >= self.init_min:
+ self._state = self.NOISE
+ if len(self._data) >= self.max_length:
+ self._process_end_of_detection(True)
+ else:
+ self._state = self.POSSIBLE_NOISE
+
+ elif self._state == self.POSSIBLE_NOISE:
+
+ if frame_is_valid:
+ self._silence_length = 0
+ self._init_count += 1
+ self._data.append(frame)
+ if self._init_count >= self.init_min:
+ self._state = self.NOISE
+ if len(self._data) >= self.max_length:
+ self._process_end_of_detection(True)
+
+ else:
+ self._silence_length += 1
+ if self._silence_length > self.init_max_silent or \
+ len(self._data) + 1 >= self.max_length:
+ # either init_max_silent or max_length is reached
+ # before _init_count, back to silence
+ self._data = []
+ self._state = self.SILENCE
+ else:
+ self._data.append(frame)
+
+ elif self._state == self.NOISE:
+
+ if frame_is_valid:
+ self._data.append(frame)
+ if len(self._data) >= self.max_length:
+ self._process_end_of_detection(True)
+
+ elif self.max_continuous_silence <= 0:
+ # max token reached at this frame will _deliver if _contiguous_token
+ # and not _strict_min_length
+ self._process_end_of_detection()
+ self._state = self.SILENCE
+
+ else:
+ # this is the first silent frame following a valid one
+ # and it is tolerated
+ self._silence_length = 1
+ self._data.append(frame)
+ self._state = self.POSSIBLE_SILENCE
+ if len(self._data) == self.max_length:
+ self._process_end_of_detection(True)
+ # don't reset _silence_length because we still
+ # need to know the total number of silent frames
+
+ elif self._state == self.POSSIBLE_SILENCE:
+
+ if frame_is_valid:
+ self._data.append(frame)
+ self._silence_length = 0
+ self._state = self.NOISE
+ if len(self._data) >= self.max_length:
+ self._process_end_of_detection(True)
+
+ else:
+ if self._silence_length >= self.max_continuous_silence:
+ if self._silence_length < len(self._data):
+ # _deliver only gathered frames aren't all silent
+ self._process_end_of_detection()
+ else:
+ self._data = []
+ self._state = self.SILENCE
+ self._silence_length = 0
+ else:
+ self._data.append(frame)
+ self._silence_length += 1
+ if len(self._data) >= self.max_length:
+ self._process_end_of_detection(True)
+ # don't reset _silence_length because we still
+ # need to know the total number of silent frames
+
+ def _post_process(self):
+ if self._state == self.NOISE or self._state == self.POSSIBLE_SILENCE:
+ if len(self._data) > 0 and len(self._data) > self._silence_length:
+ self._process_end_of_detection()
+
+ def _process_end_of_detection(self, truncated=False):
+
+ if not truncated and self._drop_tailing_silence and self._silence_length > 0:
+ # happens if max_continuous_silence is reached
+ # or max_length is reached at a silent frame
+ self._data = self._data[0: - self._silence_length]
+
+ if (len(self._data) >= self.min_length) or \
+ (len(self._data) > 0 and
+ not self._strict_min_length and self._contiguous_token):
+
+ _end_frame = self._start_frame + len(self._data) - 1
+ self._deliver(self._data, self._start_frame, _end_frame)
+
+ if truncated:
+ # next token (if any) will start at _current_frame + 1
+ self._start_frame = self._current_frame + 1
+ # remember that it is contiguous with the just delivered one
+ self._contiguous_token = True
+ else:
+ self._contiguous_token = False
+ else:
+ self._contiguous_token = False
+
+ self._data = []
+
+ def _append_token(self, data, start, end):
+ self._tokens.append((data, start, end))
diff --git a/libs/auditok/data/1to6arabic_16000_mono_bc_noise.wav b/libs/auditok/data/1to6arabic_16000_mono_bc_noise.wav
new file mode 100644
index 000000000..3339b8a2c
--- /dev/null
+++ b/libs/auditok/data/1to6arabic_16000_mono_bc_noise.wav
Binary files differ
diff --git a/libs/auditok/data/was_der_mensch_saet_das_wird_er_vielfach_ernten_44100Hz_mono_lead_trail_silence.wav b/libs/auditok/data/was_der_mensch_saet_das_wird_er_vielfach_ernten_44100Hz_mono_lead_trail_silence.wav
new file mode 100644
index 000000000..b3056b91a
--- /dev/null
+++ b/libs/auditok/data/was_der_mensch_saet_das_wird_er_vielfach_ernten_44100Hz_mono_lead_trail_silence.wav
Binary files differ
diff --git a/libs/auditok/dataset.py b/libs/auditok/dataset.py
new file mode 100644
index 000000000..dbee8f61e
--- /dev/null
+++ b/libs/auditok/dataset.py
@@ -0,0 +1,18 @@
+"""
+This module contains links to audio files you can use for test purposes.
+"""
+
+import os
+
+__all__ = ["one_to_six_arabic_16000_mono_bc_noise", "was_der_mensch_saet_mono_44100_lead_trail_silence"]
+
+_current_dir = os.path.dirname(os.path.realpath(__file__))
+
+one_to_six_arabic_16000_mono_bc_noise = "{cd}{sep}data{sep}1to6arabic_\
+16000_mono_bc_noise.wav".format(cd=_current_dir, sep=os.path.sep)
+"""A wave file that contains a pronunciation of Arabic numbers from 1 to 6"""
+
+was_der_mensch_saet_mono_44100_lead_trail_silence = "{cd}{sep}data{sep}was_\
+der_mensch_saet_das_wird_er_vielfach_ernten_44100Hz_mono_lead_trail_\
+silence.wav".format(cd=_current_dir, sep=os.path.sep)
+""" A wave file that contains a sentence between long leading and trailing periods of silence"""
diff --git a/libs/auditok/exceptions.py b/libs/auditok/exceptions.py
new file mode 100644
index 000000000..f3d0354b0
--- /dev/null
+++ b/libs/auditok/exceptions.py
@@ -0,0 +1,3 @@
+
+class DuplicateArgument(Exception):
+ pass
diff --git a/libs/auditok/io.py b/libs/auditok/io.py
new file mode 100644
index 000000000..772147f1c
--- /dev/null
+++ b/libs/auditok/io.py
@@ -0,0 +1,517 @@
+"""
+Module for low-level audio input-output operations.
+
+Class summary
+=============
+
+.. autosummary::
+
+ AudioSource
+ Rewindable
+ BufferAudioSource
+ WaveAudioSource
+ PyAudioSource
+ StdinAudioSource
+ PyAudioPlayer
+
+
+Function summary
+================
+
+.. autosummary::
+
+ from_file
+ player_for
+"""
+
+from abc import ABCMeta, abstractmethod
+import wave
+import sys
+
+__all__ = ["AudioSource", "Rewindable", "BufferAudioSource", "WaveAudioSource",
+ "PyAudioSource", "StdinAudioSource", "PyAudioPlayer", "from_file", "player_for"]
+
+DEFAULT_SAMPLE_RATE = 16000
+DEFAULT_SAMPLE_WIDTH = 2
+DEFAULT_NB_CHANNELS = 1
+
+
+class AudioSource():
+ """
+ Base class for audio source objects.
+
+ Subclasses should implement methods to open/close and audio stream
+ and read the desired amount of audio samples.
+
+ :Parameters:
+
+ `sampling_rate` : int
+ Number of samples per second of audio stream. Default = 16000.
+
+ `sample_width` : int
+ Size in bytes of one audio sample. Possible values : 1, 2, 4.
+ Default = 2.
+
+ `channels` : int
+ Number of channels of audio stream. The current version supports
+ only mono audio streams (i.e. one channel).
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
+ sample_width=DEFAULT_SAMPLE_WIDTH,
+ channels=DEFAULT_NB_CHANNELS):
+
+ if not sample_width in (1, 2, 4):
+ raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
+
+ if channels != 1:
+ raise ValueError("Only mono audio is currently handled")
+
+ self._sampling_rate = sampling_rate
+ self._sample_width = sample_width
+ self._channels = channels
+
+ @abstractmethod
+ def is_open(self):
+ """ Return True if audio source is open, False otherwise """
+
+ @abstractmethod
+ def open(self):
+ """ Open audio source """
+
+ @abstractmethod
+ def close(self):
+ """ Close audio source """
+
+ @abstractmethod
+ def read(self, size):
+ """
+ Read and return `size` audio samples at most.
+
+ :Parameters:
+
+ `size` : int
+ the number of samples to read.
+
+ :Returns:
+
+ Audio data as a string of length 'N' * 'sample_width' * 'channels', where 'N' is:
+
+ - `size` if `size` < 'left_samples'
+
+ - 'left_samples' if `size` > 'left_samples'
+ """
+
+ def get_sampling_rate(self):
+ """ Return the number of samples per second of audio stream """
+ return self.sampling_rate
+
+ @property
+ def sampling_rate(self):
+ """ Number of samples per second of audio stream """
+ return self._sampling_rate
+
+ @property
+ def sr(self):
+ """ Number of samples per second of audio stream """
+ return self._sampling_rate
+
+ def get_sample_width(self):
+ """ Return the number of bytes used to represent one audio sample """
+ return self.sample_width
+
+ @property
+ def sample_width(self):
+ """ Number of bytes used to represent one audio sample """
+ return self._sample_width
+
+ @property
+ def sw(self):
+ """ Number of bytes used to represent one audio sample """
+ return self._sample_width
+
+ def get_channels(self):
+ """ Return the number of channels of this audio source """
+ return self.channels
+
+ @property
+ def channels(self):
+ """ Number of channels of this audio source """
+ return self._channels
+
+ @property
+ def ch(self):
+ """ Return the number of channels of this audio source """
+ return self.channels
+
+
+class Rewindable():
+ """
+ Base class for rewindable audio streams.
+ Subclasses should implement methods to return to the beginning of an
+ audio stream as well as method to move to an absolute audio position
+ expressed in time or in number of samples.
+ """
+
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def rewind(self):
+ """ Go back to the beginning of audio stream """
+ pass
+
+ @abstractmethod
+ def get_position(self):
+ """ Return the total number of already read samples """
+
+ @abstractmethod
+ def get_time_position(self):
+ """ Return the total duration in seconds of already read data """
+
+ @abstractmethod
+ def set_position(self, position):
+ """ Move to an absolute position
+
+ :Parameters:
+
+ `position` : int
+ number of samples to skip from the start of the stream
+ """
+
+ @abstractmethod
+ def set_time_position(self, time_position):
+ """ Move to an absolute position expressed in seconds
+
+ :Parameters:
+
+ `time_position` : float
+ seconds to skip from the start of the stream
+ """
+ pass
+
+
+class BufferAudioSource(AudioSource, Rewindable):
+ """
+ An :class:`AudioSource` that encapsulates and reads data from a memory buffer.
+ It implements methods from :class:`Rewindable` and is therefore a navigable :class:`AudioSource`.
+ """
+
+ def __init__(self, data_buffer,
+ sampling_rate=DEFAULT_SAMPLE_RATE,
+ sample_width=DEFAULT_SAMPLE_WIDTH,
+ channels=DEFAULT_NB_CHANNELS):
+
+ if len(data_buffer) % (sample_width * channels) != 0:
+ raise ValueError("length of data_buffer must be a multiple of (sample_width * channels)")
+
+ AudioSource.__init__(self, sampling_rate, sample_width, channels)
+ self._buffer = data_buffer
+ self._index = 0
+ self._left = 0 if self._buffer is None else len(self._buffer)
+ self._is_open = False
+
+ def is_open(self):
+ return self._is_open
+
+ def open(self):
+ self._is_open = True
+
+ def close(self):
+ self._is_open = False
+ self.rewind()
+
+ def read(self, size):
+ if not self._is_open:
+ raise IOError("Stream is not open")
+
+ if self._left > 0:
+
+ to_read = size * self.sample_width * self.channels
+ if to_read > self._left:
+ to_read = self._left
+
+ data = self._buffer[self._index: self._index + to_read]
+ self._index += to_read
+ self._left -= to_read
+
+ return data
+
+ return None
+
+ def get_data_buffer(self):
+ """ Return all audio data as one string buffer. """
+ return self._buffer
+
+ def set_data(self, data_buffer):
+ """ Set new data for this audio stream.
+
+ :Parameters:
+
+ `data_buffer` : str, basestring, Bytes
+ a string buffer with a length multiple of (sample_width * channels)
+ """
+ if len(data_buffer) % (self.sample_width * self.channels) != 0:
+ raise ValueError("length of data_buffer must be a multiple of (sample_width * channels)")
+ self._buffer = data_buffer
+ self._index = 0
+ self._left = 0 if self._buffer is None else len(self._buffer)
+
+ def append_data(self, data_buffer):
+ """ Append data to this audio stream
+
+ :Parameters:
+
+ `data_buffer` : str, basestring, Bytes
+ a buffer with a length multiple of (sample_width * channels)
+ """
+
+ if len(data_buffer) % (self.sample_width * self.channels) != 0:
+ raise ValueError("length of data_buffer must be a multiple of (sample_width * channels)")
+
+ self._buffer += data_buffer
+ self._left += len(data_buffer)
+
+ def rewind(self):
+ self.set_position(0)
+
+ def get_position(self):
+ return self._index / self.sample_width
+
+ def get_time_position(self):
+ return float(self._index) / (self.sample_width * self.sampling_rate)
+
+ def set_position(self, position):
+ if position < 0:
+ raise ValueError("position must be >= 0")
+
+ if self._buffer is None:
+ self._index = 0
+ self._left = 0
+ return
+
+ position *= self.sample_width
+ self._index = position if position < len(self._buffer) else len(self._buffer)
+ self._left = len(self._buffer) - self._index
+
+ def set_time_position(self, time_position): # time in seconds
+ position = int(self.sampling_rate * time_position)
+ self.set_position(position)
+
+
+class WaveAudioSource(AudioSource):
+ """
+ A class for an `AudioSource` that reads data from a wave file.
+
+ :Parameters:
+
+ `filename` :
+ path to a valid wave file
+ """
+
+ def __init__(self, filename):
+
+ self._filename = filename
+ self._audio_stream = None
+
+ stream = wave.open(self._filename)
+ AudioSource.__init__(self, stream.getframerate(),
+ stream.getsampwidth(),
+ stream.getnchannels())
+ stream.close()
+
+ def is_open(self):
+ return self._audio_stream is not None
+
+ def open(self):
+ if(self._audio_stream is None):
+ self._audio_stream = wave.open(self._filename)
+
+ def close(self):
+ if self._audio_stream is not None:
+ self._audio_stream.close()
+ self._audio_stream = None
+
+ def read(self, size):
+ if self._audio_stream is None:
+ raise IOError("Stream is not open")
+ else:
+ data = self._audio_stream.readframes(size)
+ if data is None or len(data) < 1:
+ return None
+ return data
+
+
+class PyAudioSource(AudioSource):
+ """
+ A class for an `AudioSource` that reads data the built-in microphone using PyAudio.
+ """
+
+ def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
+ sample_width=DEFAULT_SAMPLE_WIDTH,
+ channels=DEFAULT_NB_CHANNELS,
+ frames_per_buffer=1024,
+ input_device_index=None):
+
+ AudioSource.__init__(self, sampling_rate, sample_width, channels)
+ self._chunk_size = frames_per_buffer
+ self.input_device_index = input_device_index
+
+ import pyaudio
+ self._pyaudio_object = pyaudio.PyAudio()
+ self._pyaudio_format = self._pyaudio_object.get_format_from_width(self.sample_width)
+ self._audio_stream = None
+
+ def is_open(self):
+ return self._audio_stream is not None
+
+ def open(self):
+ self._audio_stream = self._pyaudio_object.open(format=self._pyaudio_format,
+ channels=self.channels,
+ rate=self.sampling_rate,
+ input=True,
+ output=False,
+ input_device_index=self.input_device_index,
+ frames_per_buffer=self._chunk_size)
+
+ def close(self):
+ if self._audio_stream is not None:
+ self._audio_stream.stop_stream()
+ self._audio_stream.close()
+ self._audio_stream = None
+
+ def read(self, size):
+ if self._audio_stream is None:
+ raise IOError("Stream is not open")
+
+ if self._audio_stream.is_active():
+ data = self._audio_stream.read(size)
+ if data is None or len(data) < 1:
+ return None
+ return data
+
+ return None
+
+
+class StdinAudioSource(AudioSource):
+ """
+ A class for an :class:`AudioSource` that reads data from standard input.
+ """
+
+ def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
+ sample_width=DEFAULT_SAMPLE_WIDTH,
+ channels=DEFAULT_NB_CHANNELS):
+
+ AudioSource.__init__(self, sampling_rate, sample_width, channels)
+ self._is_open = False
+
+ def is_open(self):
+ return self._is_open
+
+ def open(self):
+ self._is_open = True
+
+ def close(self):
+ self._is_open = False
+
+ def read(self, size):
+ if not self._is_open:
+ raise IOError("Stream is not open")
+
+ to_read = size * self.sample_width * self.channels
+ if sys.version_info >= (3, 0):
+ data = sys.stdin.buffer.read(to_read)
+ else:
+ data = sys.stdin.read(to_read)
+
+ if data is None or len(data) < 1:
+ return None
+
+ return data
+
+
+class PyAudioPlayer():
+ """
+ A class for audio playback using Pyaudio
+ """
+
+ def __init__(self, sampling_rate=DEFAULT_SAMPLE_RATE,
+ sample_width=DEFAULT_SAMPLE_WIDTH,
+ channels=DEFAULT_NB_CHANNELS):
+ if not sample_width in (1, 2, 4):
+ raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
+
+ self.sampling_rate = sampling_rate
+ self.sample_width = sample_width
+ self.channels = channels
+
+ import pyaudio
+ self._p = pyaudio.PyAudio()
+ self.stream = self._p.open(format=self._p.get_format_from_width(self.sample_width),
+ channels=self.channels, rate=self.sampling_rate,
+ input=False, output=True)
+
+ def play(self, data):
+ if self.stream.is_stopped():
+ self.stream.start_stream()
+
+ for chunk in self._chunk_data(data):
+ self.stream.write(chunk)
+
+ self.stream.stop_stream()
+
+ def stop(self):
+ if not self.stream.is_stopped():
+ self.stream.stop_stream()
+ self.stream.close()
+ self._p.terminate()
+
+ def _chunk_data(self, data):
+ # make audio chunks of 100 ms to allow interruption (like ctrl+c)
+ chunk_size = int((self.sampling_rate * self.sample_width * self.channels) / 10)
+ start = 0
+ while start < len(data):
+ yield data[start: start + chunk_size]
+ start += chunk_size
+
+
+def from_file(filename):
+ """
+ Create an `AudioSource` object using the audio file specified by `filename`.
+ The appropriate :class:`AudioSource` class is guessed from file's extension.
+
+ :Parameters:
+
+ `filename` :
+ path to an audio file.
+
+ :Returns:
+
+ an `AudioSource` object that reads data from the given file.
+ """
+
+ if filename.lower().endswith(".wav"):
+ return WaveAudioSource(filename)
+
+ raise Exception("Can not create an AudioSource object from '%s'" % (filename))
+
+
+def player_for(audio_source):
+ """
+ Return a :class:`PyAudioPlayer` that can play data from `audio_source`.
+
+ :Parameters:
+
+ `audio_source` :
+ an `AudioSource` object.
+
+ :Returns:
+
+ `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
+ as `audio_source`.
+ """
+
+ return PyAudioPlayer(audio_source.get_sampling_rate(),
+ audio_source.get_sample_width(),
+ audio_source.get_channels())
diff --git a/libs/auditok/util.py b/libs/auditok/util.py
new file mode 100644
index 000000000..9bf9c8cf9
--- /dev/null
+++ b/libs/auditok/util.py
@@ -0,0 +1,843 @@
+"""
+Class summary
+=============
+
+.. autosummary::
+
+ DataSource
+ StringDataSource
+ ADSFactory
+ ADSFactory.AudioDataSource
+ ADSFactory.ADSDecorator
+ ADSFactory.OverlapADS
+ ADSFactory.LimiterADS
+ ADSFactory.RecorderADS
+ DataValidator
+ AudioEnergyValidator
+
+"""
+
+from abc import ABCMeta, abstractmethod
+import math
+from array import array
+from .io import Rewindable, from_file, BufferAudioSource, PyAudioSource
+from .exceptions import DuplicateArgument
+import sys
+
+try:
+ import numpy
+ _WITH_NUMPY = True
+except ImportError as e:
+ _WITH_NUMPY = False
+
+try:
+ from builtins import str
+ basestring = str
+except ImportError as e:
+ if sys.version_info >= (3, 0):
+ basestring = str
+
+__all__ = ["DataSource", "DataValidator", "StringDataSource", "ADSFactory", "AudioEnergyValidator"]
+
+
+class DataSource():
+ """
+ Base class for objects passed to :func:`auditok.core.StreamTokenizer.tokenize`.
+ Subclasses should implement a :func:`DataSource.read` method.
+ """
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def read(self):
+ """
+ Read a piece of data read from this source.
+ If no more data is available, return None.
+ """
+
+
+class DataValidator():
+ """
+ Base class for a validator object used by :class:`.core.StreamTokenizer` to check
+ if read data is valid.
+ Subclasses should implement :func:`is_valid` method.
+ """
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def is_valid(self, data):
+ """
+ Check whether `data` is valid
+ """
+
+
+class StringDataSource(DataSource):
+ """
+ A class that represent a :class:`DataSource` as a string buffer.
+ Each call to :func:`DataSource.read` returns on character and moves one step forward.
+ If the end of the buffer is reached, :func:`read` returns None.
+
+ :Parameters:
+
+ `data` :
+ a basestring object.
+
+ """
+
+ def __init__(self, data):
+
+ self._data = None
+ self._current = 0
+ self.set_data(data)
+
+ def read(self):
+ """
+ Read one character from buffer.
+
+ :Returns:
+
+ Current character or None if end of buffer is reached
+ """
+
+ if self._current >= len(self._data):
+ return None
+ self._current += 1
+ return self._data[self._current - 1]
+
+ def set_data(self, data):
+ """
+ Set a new data buffer.
+
+ :Parameters:
+
+ `data` : a basestring object
+ New data buffer.
+ """
+
+ if not isinstance(data, basestring):
+ raise ValueError("data must an instance of basestring")
+ self._data = data
+ self._current = 0
+
+
+class ADSFactory:
+ """
+ Factory class that makes it easy to create an :class:`ADSFactory.AudioDataSource` object that implements
+ :class:`DataSource` and can therefore be passed to :func:`auditok.core.StreamTokenizer.tokenize`.
+
+ Whether you read audio data from a file, the microphone or a memory buffer, this factory
+ instantiates and returns the right :class:`ADSFactory.AudioDataSource` object.
+
+ There are many other features you want your :class:`ADSFactory.AudioDataSource` object to have, such as:
+ memorize all read audio data so that you can rewind and reuse it (especially useful when
+ reading data from the microphone), read a fixed amount of data (also useful when reading
+ from the microphone), read overlapping audio frames (often needed when dosing a spectral
+ analysis of data).
+
+ :func:`ADSFactory.ads` automatically creates and return object with the desired behavior according
+ to the supplied keyword arguments.
+ """
+
+ @staticmethod
+ def _check_normalize_args(kwargs):
+
+ for k in kwargs:
+ if not k in ["block_dur", "hop_dur", "block_size", "hop_size", "max_time", "record",
+ "audio_source", "filename", "data_buffer", "frames_per_buffer", "sampling_rate",
+ "sample_width", "channels", "sr", "sw", "ch", "asrc", "fn", "fpb", "db", "mt",
+ "rec", "bd", "hd", "bs", "hs"]:
+ raise ValueError("Invalid argument: {0}".format(k))
+
+ if "block_dur" in kwargs and "bd" in kwargs:
+ raise DuplicateArgument("Either 'block_dur' or 'bd' must be specified, not both")
+
+ if "hop_dur" in kwargs and "hd" in kwargs:
+ raise DuplicateArgument("Either 'hop_dur' or 'hd' must be specified, not both")
+
+ if "block_size" in kwargs and "bs" in kwargs:
+ raise DuplicateArgument("Either 'block_size' or 'bs' must be specified, not both")
+
+ if "hop_size" in kwargs and "hs" in kwargs:
+ raise DuplicateArgument("Either 'hop_size' or 'hs' must be specified, not both")
+
+ if "max_time" in kwargs and "mt" in kwargs:
+ raise DuplicateArgument("Either 'max_time' or 'mt' must be specified, not both")
+
+ if "audio_source" in kwargs and "asrc" in kwargs:
+ raise DuplicateArgument("Either 'audio_source' or 'asrc' must be specified, not both")
+
+ if "filename" in kwargs and "fn" in kwargs:
+ raise DuplicateArgument("Either 'filename' or 'fn' must be specified, not both")
+
+ if "data_buffer" in kwargs and "db" in kwargs:
+ raise DuplicateArgument("Either 'filename' or 'db' must be specified, not both")
+
+ if "frames_per_buffer" in kwargs and "fbb" in kwargs:
+ raise DuplicateArgument("Either 'frames_per_buffer' or 'fpb' must be specified, not both")
+
+ if "sampling_rate" in kwargs and "sr" in kwargs:
+ raise DuplicateArgument("Either 'sampling_rate' or 'sr' must be specified, not both")
+
+ if "sample_width" in kwargs and "sw" in kwargs:
+ raise DuplicateArgument("Either 'sample_width' or 'sw' must be specified, not both")
+
+ if "channels" in kwargs and "ch" in kwargs:
+ raise DuplicateArgument("Either 'channels' or 'ch' must be specified, not both")
+
+ if "record" in kwargs and "rec" in kwargs:
+ raise DuplicateArgument("Either 'record' or 'rec' must be specified, not both")
+
+ kwargs["bd"] = kwargs.pop("block_dur", None) or kwargs.pop("bd", None)
+ kwargs["hd"] = kwargs.pop("hop_dur", None) or kwargs.pop("hd", None)
+ kwargs["bs"] = kwargs.pop("block_size", None) or kwargs.pop("bs", None)
+ kwargs["hs"] = kwargs.pop("hop_size", None) or kwargs.pop("hs", None)
+ kwargs["mt"] = kwargs.pop("max_time", None) or kwargs.pop("mt", None)
+ kwargs["asrc"] = kwargs.pop("audio_source", None) or kwargs.pop("asrc", None)
+ kwargs["fn"] = kwargs.pop("filename", None) or kwargs.pop("fn", None)
+ kwargs["db"] = kwargs.pop("data_buffer", None) or kwargs.pop("db", None)
+
+ record = kwargs.pop("record", False)
+ if not record:
+ record = kwargs.pop("rec", False)
+ if not isinstance(record, bool):
+ raise TypeError("'record' must be a boolean")
+
+ kwargs["rec"] = record
+
+ # keep long names for arguments meant for BufferAudioSource and PyAudioSource
+ if "frames_per_buffer" in kwargs or "fpb" in kwargs:
+ kwargs["frames_per_buffer"] = kwargs.pop("frames_per_buffer", None) or kwargs.pop("fpb", None)
+
+ if "sampling_rate" in kwargs or "sr" in kwargs:
+ kwargs["sampling_rate"] = kwargs.pop("sampling_rate", None) or kwargs.pop("sr", None)
+
+ if "sample_width" in kwargs or "sw" in kwargs:
+ kwargs["sample_width"] = kwargs.pop("sample_width", None) or kwargs.pop("sw", None)
+
+ if "channels" in kwargs or "ch" in kwargs:
+ kwargs["channels"] = kwargs.pop("channels", None) or kwargs.pop("ch", None)
+
+ @staticmethod
+ def ads(**kwargs):
+ """
+ Create an return an :class:`ADSFactory.AudioDataSource`. The type and behavior of the object is the result
+ of the supplied parameters.
+
+ :Parameters:
+
+ *No parameters* :
+ read audio data from the available built-in microphone with the default parameters.
+ The returned :class:`ADSFactory.AudioDataSource` encapsulate an :class:`io.PyAudioSource` object and hence
+ it accepts the next four parameters are passed to use instead of their default values.
+
+ `sampling_rate`, `sr` : *(int)*
+ number of samples per second. Default = 16000.
+
+ `sample_width`, `sw` : *(int)*
+ number of bytes per sample (must be in (1, 2, 4)). Default = 2
+
+ `channels`, `ch` : *(int)*
+ number of audio channels. Default = 1 (only this value is currently accepted)
+
+ `frames_per_buffer`, `fpb` : *(int)*
+ number of samples of PyAudio buffer. Default = 1024.
+
+ `audio_source`, `asrc` : an `AudioSource` object
+ read data from this audio source
+
+ `filename`, `fn` : *(string)*
+ build an `io.AudioSource` object using this file (currently only wave format is supported)
+
+ `data_buffer`, `db` : *(string)*
+ build an `io.BufferAudioSource` using data in `data_buffer`. If this keyword is used,
+ `sampling_rate`, `sample_width` and `channels` are passed to `io.BufferAudioSource`
+ constructor and used instead of default values.
+
+ `max_time`, `mt` : *(float)*
+ maximum time (in seconds) to read. Default behavior: read until there is no more data
+ available.
+
+ `record`, `rec` : *(bool)*
+ save all read data in cache. Provide a navigable object which boasts a `rewind` method.
+ Default = False.
+
+ `block_dur`, `bd` : *(float)*
+ processing block duration in seconds. This represents the quantity of audio data to return
+ each time the :func:`read` method is invoked. If `block_dur` is 0.025 (i.e. 25 ms) and the sampling
+ rate is 8000 and the sample width is 2 bytes, :func:`read` returns a buffer of 0.025 * 8000 * 2 = 400
+ bytes at most. This parameter will be looked for (and used if available) before `block_size`.
+ If neither parameter is given, `block_dur` will be set to 0.01 second (i.e. 10 ms)
+
+ `hop_dur`, `hd` : *(float)*
+ quantity of data to skip from current processing window. if `hop_dur` is supplied then there
+ will be an overlap of `block_dur` - `hop_dur` between two adjacent blocks. This
+ parameter will be looked for (and used if available) before `hop_size`. If neither parameter
+ is given, `hop_dur` will be set to `block_dur` which means that there will be no overlap
+ between two consecutively read blocks.
+
+ `block_size`, `bs` : *(int)*
+ number of samples to read each time the `read` method is called. Default: a block size
+ that represents a window of 10ms, so for a sampling rate of 16000, the default `block_size`
+ is 160 samples, for a rate of 44100, `block_size` = 441 samples, etc.
+
+ `hop_size`, `hs` : *(int)*
+ determines the number of overlapping samples between two adjacent read windows. For a
+ `hop_size` of value *N*, the overlap is `block_size` - *N*. Default : `hop_size` = `block_size`,
+ means that there is no overlap.
+
+ :Returns:
+
+ An AudioDataSource object that has the desired features.
+
+ :Exampels:
+
+ 1. **Create an AudioDataSource that reads data from the microphone (requires Pyaudio) with default audio parameters:**
+
+ .. code:: python
+
+ from auditok import ADSFactory
+ ads = ADSFactory.ads()
+ ads.get_sampling_rate()
+ 16000
+ ads.get_sample_width()
+ 2
+ ads.get_channels()
+ 1
+
+ 2. **Create an AudioDataSource that reads data from the microphone with a sampling rate of 48KHz:**
+
+ .. code:: python
+
+ from auditok import ADSFactory
+ ads = ADSFactory.ads(sr=48000)
+ ads.get_sampling_rate()
+ 48000
+
+ 3. **Create an AudioDataSource that reads data from a wave file:**
+
+ .. code:: python
+
+ import auditok
+ from auditok import ADSFactory
+ ads = ADSFactory.ads(fn=auditok.dataset.was_der_mensch_saet_mono_44100_lead_trail_silence)
+ ads.get_sampling_rate()
+ 44100
+ ads.get_sample_width()
+ 2
+ ads.get_channels()
+ 1
+
+ 4. **Define size of read blocks as 20 ms**
+
+ .. code:: python
+
+ import auditok
+ from auditok import ADSFactory
+ '''
+ we know samling rate for previous file is 44100 samples/second
+ so 10 ms are equivalent to 441 samples and 20 ms to 882
+ '''
+ block_size = 882
+ ads = ADSFactory.ads(bs = 882, fn=auditok.dataset.was_der_mensch_saet_mono_44100_lead_trail_silence)
+ ads.open()
+ # read one block
+ data = ads.read()
+ ads.close()
+ len(data)
+ 1764
+ assert len(data) == ads.get_sample_width() * block_size
+
+ 5. **Define block size as a duration (use block_dur or bd):**
+
+ .. code:: python
+
+ import auditok
+ from auditok import ADSFactory
+ dur = 0.25 # second
+ ads = ADSFactory.ads(bd = dur, fn=auditok.dataset.was_der_mensch_saet_mono_44100_lead_trail_silence)
+ '''
+ we know samling rate for previous file is 44100 samples/second
+ for a block duration of 250 ms, block size should be 0.25 * 44100 = 11025
+ '''
+ ads.get_block_size()
+ 11025
+ assert ads.get_block_size() == int(0.25 * 44100)
+ ads.open()
+ # read one block
+ data = ads.read()
+ ads.close()
+ len(data)
+ 22050
+ assert len(data) == ads.get_sample_width() * ads.get_block_size()
+
+ 6. **Read overlapping blocks (one of hope_size, hs, hop_dur or hd > 0):**
+
+ For better readability we'd better use :class:`auditok.io.BufferAudioSource` with a string buffer:
+
+ .. code:: python
+
+ import auditok
+ from auditok import ADSFactory
+ '''
+ we supply a data beffer instead of a file (keyword 'bata_buffer' or 'db')
+ sr : sampling rate = 16 samples/sec
+ sw : sample width = 1 byte
+ ch : channels = 1
+ '''
+ buffer = "abcdefghijklmnop" # 16 bytes = 1 second of data
+ bd = 0.250 # block duration = 250 ms = 4 bytes
+ hd = 0.125 # hop duration = 125 ms = 2 bytes
+ ads = ADSFactory.ads(db = "abcdefghijklmnop", bd = bd, hd = hd, sr = 16, sw = 1, ch = 1)
+ ads.open()
+ ads.read()
+ 'abcd'
+ ads.read()
+ 'cdef'
+ ads.read()
+ 'efgh'
+ ads.read()
+ 'ghij'
+ data = ads.read()
+ assert data == 'ijkl'
+
+ 7. **Limit amount of read data (use max_time or mt):**
+
+ .. code:: python
+
+ '''
+ We know audio file is larger than 2.25 seconds
+ We want to read up to 2.25 seconds of audio data
+ '''
+ ads = ADSFactory.ads(mt = 2.25, fn=auditok.dataset.was_der_mensch_saet_mono_44100_lead_trail_silence)
+ ads.open()
+ data = []
+ while True:
+ d = ads.read()
+ if d is None:
+ break
+ data.append(d)
+
+ ads.close()
+ data = b''.join(data)
+ assert len(data) == int(ads.get_sampling_rate() * 2.25 * ads.get_sample_width() * ads.get_channels())
+ """
+
+ # copy user's dicionary (shallow copy)
+ kwargs = kwargs.copy()
+
+ # check and normalize keyword arguments
+ ADSFactory._check_normalize_args(kwargs)
+
+ block_dur = kwargs.pop("bd")
+ hop_dur = kwargs.pop("hd")
+ block_size = kwargs.pop("bs")
+ hop_size = kwargs.pop("hs")
+ max_time = kwargs.pop("mt")
+ audio_source = kwargs.pop("asrc")
+ filename = kwargs.pop("fn")
+ data_buffer = kwargs.pop("db")
+ record = kwargs.pop("rec")
+
+ # Case 1: an audio source is supplied
+ if audio_source is not None:
+ if (filename, data_buffer) != (None, None):
+ raise Warning("You should provide one of 'audio_source', 'filename' or 'data_buffer'\
+ keyword parameters. 'audio_source' will be used")
+
+ # Case 2: a file name is supplied
+ elif filename is not None:
+ if data_buffer is not None:
+ raise Warning("You should provide one of 'filename' or 'data_buffer'\
+ keyword parameters. 'filename' will be used")
+ audio_source = from_file(filename)
+
+ # Case 3: a data_buffer is supplied
+ elif data_buffer is not None:
+ audio_source = BufferAudioSource(data_buffer=data_buffer, **kwargs)
+
+ # Case 4: try to access native audio input
+ else:
+ audio_source = PyAudioSource(**kwargs)
+
+ if block_dur is not None:
+ if block_size is not None:
+ raise DuplicateArgument("Either 'block_dur' or 'block_size' can be specified, not both")
+ else:
+ block_size = int(audio_source.get_sampling_rate() * block_dur)
+ elif block_size is None:
+ # Set default block_size to 10 ms
+ block_size = int(audio_source.get_sampling_rate() / 100)
+
+ # Instantiate base AudioDataSource
+ ads = ADSFactory.AudioDataSource(audio_source=audio_source, block_size=block_size)
+
+ # Limit data to be read
+ if max_time is not None:
+ ads = ADSFactory.LimiterADS(ads=ads, max_time=max_time)
+
+ # Record, rewind and reuse data
+ if record:
+ ads = ADSFactory.RecorderADS(ads=ads)
+
+ # Read overlapping blocks of data
+ if hop_dur is not None:
+ if hop_size is not None:
+ raise DuplicateArgument("Either 'hop_dur' or 'hop_size' can be specified, not both")
+ else:
+ hop_size = int(audio_source.get_sampling_rate() * hop_dur)
+
+ if hop_size is not None:
+ if hop_size <= 0 or hop_size > block_size:
+ raise ValueError("hop_size must be > 0 and <= block_size")
+ if hop_size < block_size:
+ ads = ADSFactory.OverlapADS(ads=ads, hop_size=hop_size)
+
+ return ads
+
+ class AudioDataSource(DataSource):
+ """
+ Base class for AudioDataSource objects.
+ It inherits from DataSource and encapsulates an AudioSource object.
+ """
+
+ def __init__(self, audio_source, block_size):
+
+ self.audio_source = audio_source
+ self.block_size = block_size
+
+ def get_block_size(self):
+ return self.block_size
+
+ def set_block_size(self, size):
+ self.block_size = size
+
+ def get_audio_source(self):
+ return self.audio_source
+
+ def set_audio_source(self, audio_source):
+ self.audio_source = audio_source
+
+ def open(self):
+ self.audio_source.open()
+
+ def close(self):
+ self.audio_source.close()
+
+ def is_open(self):
+ return self.audio_source.is_open()
+
+ def get_sampling_rate(self):
+ return self.audio_source.get_sampling_rate()
+
+ def get_sample_width(self):
+ return self.audio_source.get_sample_width()
+
+ def get_channels(self):
+ return self.audio_source.get_channels()
+
+ def rewind(self):
+ if isinstance(self.audio_source, Rewindable):
+ self.audio_source.rewind()
+ else:
+ raise Exception("Audio source is not rewindable")
+
+ def is_rewindable(self):
+ return isinstance(self.audio_source, Rewindable)
+
+ def read(self):
+ return self.audio_source.read(self.block_size)
+
+ class ADSDecorator(AudioDataSource):
+ """
+ Base decorator class for AudioDataSource objects.
+ """
+ __metaclass__ = ABCMeta
+
+ def __init__(self, ads):
+ self.ads = ads
+
+ self.get_block_size = self.ads.get_block_size
+ self.set_block_size = self.ads.set_block_size
+ self.get_audio_source = self.ads.get_audio_source
+ self.open = self.ads.open
+ self.close = self.ads.close
+ self.is_open = self.ads.is_open
+ self.get_sampling_rate = self.ads.get_sampling_rate
+ self.get_sample_width = self.ads.get_sample_width
+ self.get_channels = self.ads.get_channels
+
+ def is_rewindable(self):
+ return self.ads.is_rewindable
+
+ def rewind(self):
+ self.ads.rewind()
+ self._reinit()
+
+ def set_audio_source(self, audio_source):
+ self.ads.set_audio_source(audio_source)
+ self._reinit()
+
+ def open(self):
+ if not self.ads.is_open():
+ self.ads.open()
+ self._reinit()
+
+ @abstractmethod
+ def _reinit(self):
+ pass
+
+ class OverlapADS(ADSDecorator):
+ """
+ A class for AudioDataSource objects that can read and return overlapping
+ audio frames
+ """
+
+ def __init__(self, ads, hop_size):
+ ADSFactory.ADSDecorator.__init__(self, ads)
+
+ if hop_size <= 0 or hop_size > self.get_block_size():
+ raise ValueError("hop_size must be either 'None' or \
+ between 1 and block_size (both inclusive)")
+ self.hop_size = hop_size
+ self._actual_block_size = self.get_block_size()
+ self._reinit()
+
+ def _get_block_size():
+ return self._actual_block_size
+
+ def _read_first_block(self):
+ # For the first call, we need an entire block of size 'block_size'
+ block = self.ads.read()
+ if block is None:
+ return None
+
+ # Keep a slice of data in cache and append it in the next call
+ if len(block) > self._hop_size_bytes:
+ self._cache = block[self._hop_size_bytes:]
+
+ # Up from the next call, we will use '_read_next_blocks'
+ # and we only read 'hop_size'
+ self.ads.set_block_size(self.hop_size)
+ self.read = self._read_next_blocks
+
+ return block
+
+ def _read_next_blocks(self):
+ block = self.ads.read()
+ if block is None:
+ return None
+
+ # Append block to cache data to ensure overlap
+ block = self._cache + block
+ # Keep a slice of data in cache only if we have a full length block
+ # if we don't that means that this is the last block
+ if len(block) == self._block_size_bytes:
+ self._cache = block[self._hop_size_bytes:]
+ else:
+ self._cache = None
+
+ return block
+
+ def read(self):
+ pass
+
+ def _reinit(self):
+ self._cache = None
+ self.ads.set_block_size(self._actual_block_size)
+ self._hop_size_bytes = self.hop_size * \
+ self.get_sample_width() * \
+ self.get_channels()
+ self._block_size_bytes = self.get_block_size() * \
+ self.get_sample_width() * \
+ self.get_channels()
+ self.read = self._read_first_block
+
+ class LimiterADS(ADSDecorator):
+ """
+ A class for AudioDataSource objects that can read a fixed amount of data.
+ This can be useful when reading data from the microphone or from large audio files.
+ """
+
+ def __init__(self, ads, max_time):
+ ADSFactory.ADSDecorator.__init__(self, ads)
+
+ self.max_time = max_time
+ self._reinit()
+
+ def read(self):
+ if self._total_read_bytes >= self._max_read_bytes:
+ return None
+ block = self.ads.read()
+ if block is None:
+ return None
+ self._total_read_bytes += len(block)
+
+ if self._total_read_bytes >= self._max_read_bytes:
+ self.close()
+
+ return block
+
+ def _reinit(self):
+ self._max_read_bytes = int(self.max_time * self.get_sampling_rate()) * \
+ self.get_sample_width() * \
+ self.get_channels()
+ self._total_read_bytes = 0
+
+ class RecorderADS(ADSDecorator):
+ """
+ A class for AudioDataSource objects that can record all audio data they read,
+ with a rewind facility.
+ """
+
+ def __init__(self, ads):
+ ADSFactory.ADSDecorator.__init__(self, ads)
+
+ self._reinit()
+
+ def read(self):
+ pass
+
+ def _read_and_rec(self):
+ # Read and save read data
+ block = self.ads.read()
+ if block is not None:
+ self._cache.append(block)
+
+ return block
+
+ def _read_simple(self):
+ # Read without recording
+ return self.ads.read()
+
+ def rewind(self):
+ if self._record:
+ # If has been recording, create a new BufferAudioSource
+ # from recorded data
+ dbuffer = self._concatenate(self._cache)
+ asource = BufferAudioSource(dbuffer, self.get_sampling_rate(),
+ self.get_sample_width(),
+ self.get_channels())
+
+ self.set_audio_source(asource)
+ self.open()
+ self._cache = []
+ self._record = False
+ self.read = self._read_simple
+
+ else:
+ self.ads.rewind()
+ if not self.is_open():
+ self.open()
+
+ def is_rewindable(self):
+ return True
+
+ def _reinit(self):
+ # when audio_source is replaced, start recording again
+ self._record = True
+ self._cache = []
+ self.read = self._read_and_rec
+
+ def _concatenate(self, data):
+ try:
+ # should always work for python 2
+ # work for python 3 ONLY if data is a list (or an iterator)
+ # whose each element is a 'bytes' objects
+ return b''.join(data)
+ except TypeError:
+ # work for 'str' in python 2 and python 3
+ return ''.join(data)
+
+
+class AudioEnergyValidator(DataValidator):
+ """
+ The most basic auditok audio frame validator.
+ This validator computes the log energy of an input audio frame
+ and return True if the result is >= a given threshold, False
+ otherwise.
+
+ :Parameters:
+
+ `sample_width` : *(int)*
+ Number of bytes of one audio sample. This is used to convert data from `basestring` or `Bytes` to
+ an array of floats.
+
+ `energy_threshold` : *(float)*
+ A threshold used to check whether an input data buffer is valid.
+ """
+
+ if _WITH_NUMPY:
+ _formats = {1: numpy.int8, 2: numpy.int16, 4: numpy.int32}
+
+ @staticmethod
+ def _convert(signal, sample_width):
+ return numpy.array(numpy.frombuffer(signal, dtype=AudioEnergyValidator._formats[sample_width]),
+ dtype=numpy.float64)
+
+ @staticmethod
+ def _signal_energy(signal):
+ return float(numpy.dot(signal, signal)) / len(signal)
+
+ @staticmethod
+ def _signal_log_energy(signal):
+ energy = AudioEnergyValidator._signal_energy(signal)
+ if energy <= 0:
+ return -200
+ return 10. * numpy.log10(energy)
+
+ else:
+ _formats = {1: 'b', 2: 'h', 4: 'i'}
+
+ @staticmethod
+ def _convert(signal, sample_width):
+ return array("d", array(AudioEnergyValidator._formats[sample_width], signal))
+
+ @staticmethod
+ def _signal_energy(signal):
+ energy = 0.
+ for a in signal:
+ energy += a * a
+ return energy / len(signal)
+
+ @staticmethod
+ def _signal_log_energy(signal):
+ energy = AudioEnergyValidator._signal_energy(signal)
+ if energy <= 0:
+ return -200
+ return 10. * math.log10(energy)
+
+ def __init__(self, sample_width, energy_threshold=45):
+ self.sample_width = sample_width
+ self._energy_threshold = energy_threshold
+
+ def is_valid(self, data):
+ """
+ Check if data is valid. Audio data will be converted into an array (of
+ signed values) of which the log energy is computed. Log energy is computed
+ as follows:
+
+ .. code:: python
+
+ arr = AudioEnergyValidator._convert(signal, sample_width)
+ energy = float(numpy.dot(arr, arr)) / len(arr)
+ log_energy = 10. * numpy.log10(energy)
+
+
+ :Parameters:
+
+ `data` : either a *string* or a *Bytes* buffer
+ `data` is converted into a numerical array using the `sample_width`
+ given in the constructor.
+
+ :Returns:
+
+ True if `log_energy` >= `energy_threshold`, False otherwise.
+ """
+
+ signal = AudioEnergyValidator._convert(data, self.sample_width)
+ return AudioEnergyValidator._signal_log_energy(signal) >= self._energy_threshold
+
+ def get_energy_threshold(self):
+ return self._energy_threshold
+
+ def set_energy_threshold(self, threshold):
+ self._energy_threshold = threshold