12 files changed, 4572 insertions, 2225 deletions
diff --git a/libs/auditok/__init__.py b/libs/auditok/__init__.py
index 4ea697b77..edd336cc3 100644
--- a/libs/auditok/__init__.py
+++ b/libs/auditok/__init__.py
@@ -2,20 +2,16 @@
 :author:
 
 Amine SEHILI <[email protected]>
-2015-2016
+2015-2021
 
 :License:
 
-This package is published under GNU GPL Version 3.
+This package is published under the MIT license.
 """
 
-from __future__ import absolute_import
 from .core import *
 from .io import *
 from .util import *
-from . import dataset
 from .exceptions import *
 
-__version__ = "0.1.5"
-
-
+__version__ = "0.2.0"
diff --git a/libs/auditok/cmdline.py b/libs/auditok/cmdline.py
index b6a51d11b..7e7450762 100755
--- a/libs/auditok/cmdline.py
+++ b/libs/auditok/cmdline.py
@@ -1,789 +1,428 @@
 #!/usr/bin/env python
 # encoding: utf-8
-'''
-auditok.auditok -- Audio Activity Detection tool
-
-auditok.auditok is a program that can be used for Audio/Acoustic activity detection.
-It can read audio data from audio files as well as from built-in device(s) or standard input 
+"""
+`auditok` -- An Audio Activity Detection tool
 
+`auditok` is a program that can be used for Audio/Acoustic
+activity detection. It can read audio data from audio files as well
+as from the microphone or standard input.
 
 @author:     Mohamed El Amine SEHILI
-
-@copyright:  2015 Mohamed El Amine SEHILI
-
-@license:    GPL v3
-
+@copyright:  2015-2021 Mohamed El Amine SEHILI
+@license:    MIT
 @contact:    [email protected]
-@deffield    updated: 02 Dec 2015
-'''
+@deffield    updated: 01 Mar 2021
+"""
 
 import sys
 import os
-
-from optparse import OptionParser, OptionGroup
-from threading import Thread
-import tempfile
-import wave
+from argparse import ArgumentParser
 import time
 import threading
-import logging
 
-try:
-    import future
-    from queue import Queue, Empty
-except ImportError:
-    if sys.version_info >= (3, 0):
-        from queue import Queue, Empty
-    else:
-        from Queue import Queue, Empty
+from auditok import __version__, AudioRegion
+from .util import AudioDataSource
+from .exceptions import EndOfProcessing, AudioEncodingWarning
+from .io import player_for
+from .cmdline_util import make_logger, make_kwargs, initialize_workers
+from . import workers
 
-try:
-    from pydub import AudioSegment
-    WITH_PYDUB = True
-except ImportError:
-    WITH_PYDUB = False
-    
-
-from .core import StreamTokenizer
-from .io import PyAudioSource, BufferAudioSource, StdinAudioSource, player_for
-from .util import ADSFactory, AudioEnergyValidator
-from auditok import __version__ as version
 
 __all__ = []
-__version__ = version
-__date__ = '2015-11-23'
-__updated__ = '2015-03-11'
-
-DEBUG = 0
-TESTRUN = 1
-PROFILE = 0
-
-LOGGER_NAME = "AUDITOK_LOGGER"
-
-class AudioFileFormatError(Exception):
-    pass
-
-class TimeFormatError(Exception):
-    pass
-
-def file_to_audio_source(filename, filetype=None, **kwargs):
-    
-    lower_fname = filename.lower()
-    rawdata = False
-    
-    if filetype is not None:
-        filetype = filetype.lower()
-    
-    if filetype == "raw" or (filetype is None and lower_fname.endswith(".raw")):
-        
-        srate = kwargs.pop("sampling_rate", None)
-        if srate is None:
-            srate = kwargs.pop("sr", None)
-            
-        swidth = kwargs.pop("sample_width", None)
-        if swidth is None:
-            swidth = kwargs.pop("sw", None)
-        
-        ch = kwargs.pop("channels", None)
-        if ch is None:
-            ch = kwargs.pop("ch", None)
-        
-        if None in (swidth, srate, ch):
-            raise Exception("All audio parameters are required for raw data") 
-        
-        data = open(filename).read()
-        rawdata = True
-        
-    # try first with pydub
-    if WITH_PYDUB:
-        
-        use_channel = kwargs.pop("use_channel", None)
-        if use_channel is None:
-            use_channel = kwargs.pop("uc", None)
-        
-        if use_channel is None:
-            use_channel = 1
-        else:
-            try:
-                use_channel = int(use_channel)
-            except ValueError:
-                pass
-        
-        if not isinstance(use_channel, (int)) and not use_channel.lower() in ["left", "right", "mix"] :
-            raise ValueError("channel must be an integer or one of 'left', 'right' or 'mix'")
-        
-        asegment = None
-        
-        if rawdata:
-            asegment = AudioSegment(data, sample_width=swidth, frame_rate=srate, channels=ch)
-        if filetype in("wave", "wav") or (filetype is None and lower_fname.endswith(".wav")):
-            asegment = AudioSegment.from_wav(filename)
-        elif filetype == "mp3" or (filetype is None and lower_fname.endswith(".mp3")):
-            asegment = AudioSegment.from_mp3(filename)
-        elif filetype == "ogg" or (filetype is None and lower_fname.endswith(".ogg")):
-            asegment = AudioSegment.from_ogg(filename)
-        elif filetype == "flv" or (filetype is None and lower_fname.endswith(".flv")):
-            asegment = AudioSegment.from_flv(filename)
-        else:
-            asegment = AudioSegment.from_file(filename)
-            
-        if asegment.channels > 1:
-            
-            if isinstance(use_channel, int):
-                if use_channel > asegment.channels:
-                    raise ValueError("Can not use channel '{0}', audio file has only {1} channels".format(use_channel, asegment.channels))
-                else:
-                    asegment = asegment.split_to_mono()[use_channel - 1]
-            else:
-                ch_lower = use_channel.lower()
-                
-                if ch_lower == "mix":
-                    asegment = asegment.set_channels(1)
-                    
-                elif use_channel.lower() == "left":
-                    asegment = asegment.split_to_mono()[0]
-                    
-                elif use_channel.lower() == "right":
-                    asegment = asegment.split_to_mono()[1]
-        
-        return BufferAudioSource(data_buffer = asegment._data,
-                                     sampling_rate = asegment.frame_rate,
-                                     sample_width = asegment.sample_width,
-                                     channels = asegment.channels)
-    # fall back to standard python
-    else:
-        if rawdata:
-            if ch != 1:
-                raise ValueError("Cannot handle multi-channel audio without pydub")
-            return BufferAudioSource(data, srate, swidth, ch)
-    
-        if filetype in ("wav", "wave") or (filetype is None and lower_fname.endswith(".wav")):
-            
-            wfp = wave.open(filename)
-            
-            ch = wfp.getnchannels()
-            if ch != 1:
-                wfp.close()
-                raise ValueError("Cannot handle multi-channel audio without pydub")
-           
-            srate = wfp.getframerate()
-            swidth = wfp.getsampwidth()
-            data = wfp.readframes(wfp.getnframes())
-            wfp.close()
-            return BufferAudioSource(data, srate, swidth, ch)
-        
-        raise AudioFileFormatError("Cannot read audio file format")
-
-
-def save_audio_data(data, filename, filetype=None, **kwargs):
-    
-    lower_fname = filename.lower()
-    if filetype is not None:
-        filetype = filetype.lower()
-        
-    # save raw data
-    if filetype == "raw" or (filetype is None and lower_fname.endswith(".raw")):
-        fp = open(filename, "w")
-        fp.write(data)
-        fp.close()
-        return
-    
-    # save other types of data
-    # requires all audio parameters
-    srate = kwargs.pop("sampling_rate", None)
-    if srate is None:
-        srate = kwargs.pop("sr", None)
-        
-    swidth = kwargs.pop("sample_width", None)
-    if swidth is None:
-        swidth = kwargs.pop("sw", None)
-    
-    ch = kwargs.pop("channels", None)
-    if ch is None:
-        ch = kwargs.pop("ch", None)
-    
-    if None in (swidth, srate, ch):
-        raise Exception("All audio parameters are required to save no raw data")
-        
-    if filetype in ("wav", "wave") or (filetype is None and lower_fname.endswith(".wav")):
-        # use standard python's wave module
-        fp = wave.open(filename, "w")
-        fp.setnchannels(ch)
-        fp.setsampwidth(swidth)
-        fp.setframerate(srate)
-        fp.writeframes(data)
-        fp.close()
-    
-    elif WITH_PYDUB:
-        
-        asegment = AudioSegment(data, sample_width=swidth, frame_rate=srate, channels=ch)
-        asegment.export(filename, format=filetype)
-    
-    else:
-        raise AudioFileFormatError("cannot write file format {0} (file name: {1})".format(filetype, filename))
-
-
-def plot_all(signal, sampling_rate, energy_as_amp, detections=[], show=True, save_as=None):
-    
-    import matplotlib.pyplot as plt
-    import numpy as np
-    t = np.arange(0., np.ceil(float(len(signal))) / sampling_rate, 1./sampling_rate )
-    if len(t) > len(signal):
-        t = t[: len(signal) - len(t)]
-    
-    for start, end in detections:
-        p = plt.axvspan(start, end, facecolor='g', ec = 'r', lw = 2,  alpha=0.4)
-    
-    line = plt.axhline(y=energy_as_amp, lw=1, ls="--", c="r", label="Energy threshold as normalized amplitude")
-    plt.plot(t, signal)
-    legend = plt.legend(["Detection threshold"], bbox_to_anchor=(0., 1.02, 1., .102), loc=1, fontsize=16)
-    ax = plt.gca().add_artist(legend)
-
-    plt.xlabel("Time (s)", fontsize=24)
-    plt.ylabel("Amplitude (normalized)", fontsize=24)
-    
-    if save_as is not None:
-        plt.savefig(save_as, dpi=120)
-    
-    if show:
-        plt.show()
-
-
-def seconds_to_str_fromatter(_format):
-    """
-    Accepted format directives: %i %s %m %h
-    """
-    # check directives are correct 
-    
-    if _format == "%S":
-        def _fromatter(seconds):
-            return "{:.2f}".format(seconds)
-    
-    elif _format == "%I":
-        def _fromatter(seconds):
-            return "{0}".format(int(seconds * 1000))
-    
-    else:
-        _format = _format.replace("%h", "{hrs:02d}")
-        _format = _format.replace("%m", "{mins:02d}")
-        _format = _format.replace("%s", "{secs:02d}")
-        _format = _format.replace("%i", "{millis:03d}")
-        
-        try:
-            i = _format.index("%")
-            raise TimeFormatError("Unknow time format directive '{0}'".format(_format[i:i+2]))
-        except ValueError:
-            pass
-        
-        def _fromatter(seconds):
-            millis = int(seconds * 1000)
-            hrs, millis = divmod(millis, 3600000)
-            mins, millis = divmod(millis, 60000)
-            secs, millis = divmod(millis, 1000)
-            return _format.format(hrs=hrs, mins=mins, secs=secs, millis=millis)
-    
-    return _fromatter
-
-
-
-class Worker(Thread):
-    
-    def __init__(self, timeout=0.2, debug=False, logger=None):
-        self.timeout = timeout
-        self.debug = debug
-        self.logger = logger
-        
-        if self.debug and self.logger is None:
-            self.logger = logging.getLogger(LOGGER_NAME)
-            self.logger.setLevel(logging.DEBUG)
-            handler = logging.StreamHandler(sys.stdout)
-            self.logger.addHandler(handler)
-            
-        self._inbox = Queue()
-        self._stop_request = Queue()
-        Thread.__init__(self)
-    
-    
-    def debug_message(self, message):
-        self.logger.debug(message)
-        
-    def _stop_requested(self):
-        
-        try:
-            message = self._stop_request.get_nowait()
-            if message == "stop":
-                return True
-
-        except Empty:
-            return False
-    
-    def stop(self):
-        self._stop_request.put("stop")
-        self.join()
-        
-    def send(self, message):
-        self._inbox.put(message)
-    
-    def _get_message(self):
-        try:
-            message = self._inbox.get(timeout=self.timeout)
-            return message        
-        except Empty:
-            return None
-
-
-class TokenizerWorker(Worker):
-    
-    END_OF_PROCESSING = "END_OF_PROCESSING"
-    
-    def __init__(self, ads, tokenizer, analysis_window, observers):
-        self.ads = ads
-        self.tokenizer = tokenizer
-        self.analysis_window = analysis_window
-        self.observers = observers
-        self._inbox = Queue()
-        self.count = 0
-        Worker.__init__(self)
-        
-    def run(self):
-        
-        def notify_observers(data, start, end):
-            audio_data = b''.join(data)
-            self.count += 1
-            
-            start_time = start * self.analysis_window
-            end_time = (end+1) * self.analysis_window
-            duration = (end - start + 1) * self.analysis_window
-            
-            # notify observers
-            for observer in self.observers:
-                observer.notify({"id" : self.count,
-                                 "audio_data" : audio_data,
-                                 "start" : start,
-                                 "end" : end,
-                                 "start_time" : start_time,
-                                 "end_time" : end_time,
-                                 "duration" : duration}
-                                )
-        
-        self.ads.open()
-        self.tokenizer.tokenize(data_source=self, callback=notify_observers)
-        for observer in self.observers:
-            observer.notify(TokenizerWorker.END_OF_PROCESSING)
-            
-    def add_observer(self, observer):
-        self.observers.append(observer)
-       
-    def remove_observer(self, observer):
-        self.observers.remove(observer)
-    
-    def read(self):
-        if self._stop_requested():
-            return None
-        else:
-            return self.ads.read()
-    
-        
-class PlayerWorker(Worker):
-    
-    def __init__(self, player, timeout=0.2, debug=False, logger=None):
-        self.player = player
-        Worker.__init__(self, timeout=timeout, debug=debug, logger=logger)
-        
-    def run(self):
-        while True:
-            if self._stop_requested():
-                break
-            
-            message = self._get_message()
-            if message is not None:
-                if message == TokenizerWorker.END_OF_PROCESSING:
-                    break
-                
-                audio_data = message.pop("audio_data", None)
-                start_time = message.pop("start_time", None)
-                end_time = message.pop("end_time", None)
-                dur = message.pop("duration", None)
-                _id = message.pop("id", None)
-                
-                if audio_data is not None:
-                    if self.debug:
-                        self.debug_message("[PLAY]: Detection {id} played (start:{start}, end:{end}, dur:{dur})".format(id=_id, 
-                        start="{:5.2f}".format(start_time), end="{:5.2f}".format(end_time), dur="{:5.2f}".format(dur)))
-                    self.player.play(audio_data)
-    
-    def notify(self, message):
-        self.send(message)
-        
-               
-class CommandLineWorker(Worker):
-    
-    def __init__(self, command, timeout=0.2, debug=False, logger=None):
-        self.command = command
-        Worker.__init__(self, timeout=timeout, debug=debug, logger=logger)
-    
-    def run(self):
-        while True:
-            if self._stop_requested():
-                break
-            
-            message = self._get_message()
-            if message is not None:
-                if message == TokenizerWorker.END_OF_PROCESSING:
-                    break
-                
-                audio_data = message.pop("audio_data", None)
-                _id = message.pop("id", None)
-                if audio_data is not None:
-                    raw_audio_file = tempfile.NamedTemporaryFile(delete=False)
-                    raw_audio_file.write(audio_data)
-                    cmd = self.command.replace("$", raw_audio_file.name)
-                    if self.debug:
-                        self.debug_message("[CMD ]: Detection {id} command: {cmd}".format(id=_id, cmd=cmd))
-                    os.system(cmd)
-                    os.unlink(raw_audio_file.name)
-                
-    def notify(self, message):
-        self.send(message)
-        
-
-class TokenSaverWorker(Worker):
-    
-    def __init__(self, name_format, filetype, timeout=0.2, debug=False, logger=None, **kwargs):
-        self.name_format = name_format
-        self.filetype = filetype
-        self.kwargs = kwargs
-        Worker.__init__(self, timeout=timeout, debug=debug, logger=logger)
-    
-    def run(self):
-        while True:
-            if self._stop_requested():
-                break
-            
-            message = self._get_message()
-            if message is not None:
-                if message == TokenizerWorker.END_OF_PROCESSING:
-                    break
-                
-                audio_data = message.pop("audio_data", None)
-                start_time = message.pop("start_time", None)
-                end_time = message.pop("end_time", None)
-                _id = message.pop("id", None)
-                if audio_data is not None and len(audio_data) > 0:
-                    fname = self.name_format.format(N=_id, start = "{:.2f}".format(start_time), end = "{:.2f}".format(end_time))
-                    try:
-                        if self.debug:
-                            self.debug_message("[SAVE]: Detection {id} saved as {fname}".format(id=_id, fname=fname))
-                        save_audio_data(audio_data, fname, filetype=self.filetype, **self.kwargs)
-                    except Exception as e:
-                        sys.stderr.write(str(e) + "\n")
-    
-    def notify(self, message):
-        self.send(message)
-
-
-class LogWorker(Worker):
-    
-    def __init__(self, print_detections=False, output_format="{start} {end}",
-                 time_formatter=seconds_to_str_fromatter("%S"), timeout=0.2, debug=False, logger=None):
-        
-        self.print_detections = print_detections
-        self.output_format = output_format
-        self.time_formatter = time_formatter
-        self.detections = []
-        Worker.__init__(self, timeout=timeout, debug=debug, logger=logger)
-        
-    def run(self):
-        while True:
-            if self._stop_requested():
-                break
-            
-            message = self._get_message()
-            
-            if message is not None:
-                
-                if message == TokenizerWorker.END_OF_PROCESSING:
-                    break
-                
-                audio_data = message.pop("audio_data", None)
-                _id = message.pop("id", None)
-                start = message.pop("start", None)
-                end = message.pop("end", None)
-                start_time = message.pop("start_time", None)
-                end_time = message.pop("end_time", None)
-                if audio_data is not None and len(audio_data) > 0:
-                    
-                    if self.debug:
-                        self.debug_message("[DET ]: Detection {id} (start:{start}, end:{end})".format(id=_id, 
-                            start="{:5.2f}".format(start_time),
-                            end="{:5.2f}".format(end_time)))
-                    
-                    if self.print_detections:
-                        print(self.output_format.format(id = _id,
-                            start = self.time_formatter(start_time),
-                            end = self.time_formatter(end_time)))
-                        
-                    self.detections.append((_id, start, end, start_time, end_time))
-                   
-    
-    def notify(self, message):
-        self.send(message)
-
+__date__ = "2015-11-23"
+__updated__ = "2021-03-01"
 
 
 def main(argv=None):
-    '''Command line options.'''
-
     program_name = os.path.basename(sys.argv[0])
-    program_version = version
-    program_build_date = "%s" % __updated__
-
-    program_version_string = '%%prog %s (%s)' % (program_version, program_build_date)
-    #program_usage = '''usage: spam two eggs''' # optional - will be autogenerated by optparse
-    program_longdesc = '''''' # optional - give further explanation about what the program does
-    program_license = "Copyright 2015 Mohamed El Amine SEHILI                                            \
-                Licensed under the General Public License (GPL) Version 3 \nhttp://www.gnu.org/licenses/"
-
     if argv is None:
         argv = sys.argv[1:]
     try:
-        # setup option parser
-        parser = OptionParser(version=program_version_string, epilog=program_longdesc, description=program_license)
-        
-        group = OptionGroup(parser, "[Input-Output options]")
-        group.add_option("-i", "--input", dest="input", help="Input audio or video file. Use - for stdin [default: read from microphone using pyaudio]", metavar="FILE")
-        group.add_option("-t", "--input-type", dest="input_type", help="Input audio file type. Mandatory if file name has no extension [default: %default]", type=str, default=None, metavar="String")
-        group.add_option("-M", "--max_time", dest="max_time", help="Max data (in seconds) to read from microphone/file [default: read until the end of file/stream]", type=float, default=None, metavar="FLOAT")
-        group.add_option("-O", "--output-main", dest="output_main", help="Save main stream as. If omitted main stream will not be saved [default: omitted]", type=str, default=None, metavar="FILE")
-        group.add_option("-o", "--output-tokens", dest="output_tokens", help="Output file name format for detections. Use {N} and {start} and {end} to build file names, example: 'Det_{N}_{start}-{end}.wav'", type=str, default=None, metavar="STRING")
-        group.add_option("-T", "--output-type", dest="output_type", help="Audio type used to save detections and/or main stream. If not supplied will: (1). guess from extension or (2). use wav format", type=str, default=None, metavar="STRING")
-        group.add_option("-u", "--use-channel", dest="use_channel", help="Choose channel to use from a multi-channel audio file (requires pydub). 'left', 'right' and 'mix' are accepted values. [Default: 1 (i.e. 1st or left channel)]", type=str, default="1", metavar="STRING")
-        parser.add_option_group(group)
-        
-        
-        group = OptionGroup(parser, "[Tokenization options]", "Set tokenizer options and energy threshold.")
-        group.add_option("-a", "--analysis-window", dest="analysis_window", help="Size of analysis window in seconds [default: %default (10ms)]", type=float, default=0.01, metavar="FLOAT")
-        group.add_option("-n", "--min-duration", dest="min_duration", help="Min duration of a valid audio event in seconds [default: %default]", type=float, default=0.2, metavar="FLOAT")
-        group.add_option("-m", "--max-duration", dest="max_duration", help="Max duration of a valid audio event in seconds [default: %default]", type=float, default=5, metavar="FLOAT")
-        group.add_option("-s", "--max-silence", dest="max_silence", help="Max duration of a consecutive silence within a valid audio event in seconds [default: %default]", type=float, default=0.3, metavar="FLOAT")
-        group.add_option("-d", "--drop-trailing-silence", dest="drop_trailing_silence", help="Drop trailing silence from a detection [default: keep trailing silence]",  action="store_true", default=False)
-        group.add_option("-e", "--energy-threshold", dest="energy_threshold", help="Log energy threshold for detection [default: %default]", type=float, default=50, metavar="FLOAT")
-        parser.add_option_group(group)
-        
-        
-        group = OptionGroup(parser, "[Audio parameters]", "Define audio parameters if data is read from a headerless file (raw or stdin) or you want to use different microphone parameters.")        
-        group.add_option("-r", "--rate", dest="sampling_rate", help="Sampling rate of audio data [default: %default]", type=int, default=16000, metavar="INT")
-        group.add_option("-c", "--channels", dest="channels", help="Number of channels of audio data [default: %default]", type=int, default=1, metavar="INT")
-        group.add_option("-w", "--width", dest="sample_width", help="Number of bytes per audio sample [default: %default]", type=int, default=2, metavar="INT")
-        parser.add_option_group(group)
-        
-        group = OptionGroup(parser, "[Do something with detections]", "Use these options to print, play or plot detections.") 
-        group.add_option("-C", "--command", dest="command", help="Command to call when an audio detection occurs. Use $ to represent the file name to use with the command (e.g. -C 'du -h $')", default=None, type=str, metavar="STRING")
-        group.add_option("-E", "--echo", dest="echo", help="Play back each detection immediately using pyaudio [default: do not play]",  action="store_true", default=False)
-        group.add_option("-p", "--plot", dest="plot", help="Plot and show audio signal and detections (requires matplotlib)",  action="store_true", default=False)
-        group.add_option("", "--save-image", dest="save_image", help="Save plotted audio signal and detections as a picture or a PDF file (requires matplotlib)",  type=str, default=None, metavar="FILE")
-        group.add_option("", "--printf", dest="printf", help="print detections one per line using a user supplied format (e.g. '[{id}]: {start} -- {end}'). Available keywords {id}, {start} and {end}",  type=str, default="{id} {start} {end}", metavar="STRING")
-        group.add_option("", "--time-format", dest="time_format", help="format used to print {start} and {end}. [Default= %default]. %S: absolute time in sec. %I: absolute time in ms. If at least one of (%h, %m, %s, %i) is used, convert time into hours, minutes, seconds and millis (e.g. %h:%m:%s.%i). Only required fields are printed",  type=str, default="%S", metavar="STRING")
-        parser.add_option_group(group)
-        
-        parser.add_option("-q", "--quiet", dest="quiet", help="Do not print any information about detections [default: print 'id', 'start' and 'end' of each detection]",  action="store_true", default=False)
-        parser.add_option("-D", "--debug", dest="debug", help="Print processing operations to STDOUT",  action="store_true", default=False)
-        parser.add_option("", "--debug-file", dest="debug_file", help="Print processing operations to FILE",  type=str, default=None, metavar="FILE")
-        
-        
+        parser = ArgumentParser(
+            prog=program_name, description="An Audio Tokenization tool"
+        )
+        parser.add_argument(
+            "--version", "-v", action="version", version=__version__
+        )
+        group = parser.add_argument_group("Input-Output options")
+        group.add_argument(
+            dest="input",
+            help="Input audio or video file. Use '-' for stdin "
+            "[default: read from microphone using pyaudio]",
+            metavar="input",
+            nargs="?",
+            default=None,
+        )
+        group.add_argument(
+            "-I",
+            "--input-device-index",
+            dest="input_device_index",
+            help="Audio device index [default: %(default)s]. "
+            "Optional and only effective when using PyAudio",
+            type=int,
+            default=None,
+            metavar="INT",
+        )
+        group.add_argument(
+            "-F",
+            "--audio-frame-per-buffer",
+            dest="frame_per_buffer",
+            help="Audio frame per buffer [default: %(default)s]. "
+            "Optional and only effective when using PyAudio",
+            type=int,
+            default=1024,
+            metavar="INT",
+        )
+        group.add_argument(
+            "-f",
+            "--input-format",
+            dest="input_format",
+            type=str,
+            default=None,
+            help="Input audio file format. If not given, guess format from "
+            "extension. If output file name has no extension, guess format "
+            "from file header (requires pydub). If none of the previous is "
+            "true, raise an error",
+            metavar="STRING",
+        )
+        group.add_argument(
+            "-M",
+            "--max-read",
+            dest="max_read",
+            type=float,
+            default=None,
+            help="Maximum data (in seconds) to read from microphone or file "
+            "[default: read until the end of file/stream]",
+            metavar="FLOAT",
+        )
+        group.add_argument(
+            "-L",
+            "--large-file",
+            dest="large_file",
+            action="store_true",
+            default=False,
+            help="Whether input file should be treated as a large file. "
+            "If True, data will be read from file on demand, otherwise all "
+            "audio data is loaded to memory before tokenization.",
+        )
+        group.add_argument(
+            "-O",
+            "--save-stream",
+            dest="save_stream",
+            type=str,
+            default=None,
+            help="Save acquired audio data (from file or microphone) to disk."
+            " If omitted no data will be saved. [default: omitted]",
+            metavar="FILE",
+        )
+        group.add_argument(
+            "-o",
+            "--save-detections-as",
+            dest="save_detections_as",
+            type=str,
+            default=None,
+            help="File name format for detections."
+            "The following placeholders can be used to build output file name "
+            "for each detection: {id} (sequential, starts from 1), {start}, "
+            "{end} and {duration}. Time placeholders are in seconds. "
+            "Example: 'Event_{id}_{start}-{end}_{duration:.3f}.wav'",
+            metavar="STRING",
+        )
+        group.add_argument(
+            "-T",
+            "--output-format",
+            dest="output_format",
+            type=str,
+            default=None,
+            help="Audio format used to save detections and/or main stream. "
+            "If not supplied, then it will: (1. be guessed from extension or "
+            "(2. use raw format",
+            metavar="STRING",
+        )
+        group.add_argument(
+            "-u",
+            "--use-channel",
+            dest="use_channel",
+            type=str,
+            default=None,
+            help="Which channel to use for tokenization when input stream is "
+            "multi-channel (0 is the first channel). Default is None, meaning "
+            "that all channels will be considered for tokenization (i.e., get "
+            "any valid audio event regardless of the channel it occurs in). "
+            "This value can also be 'mix' (alias 'avg' or 'average') and "
+            "means mix down all audio channels into one channel (i.e. compute "
+            "average channel) and use the resulting channel for tokenization. "
+            "Whatever option is used, saved audio events will contain the same"
+            " number of channels as input stream. "
+            "[Default: None, use all channels]",
+            metavar="INT/STRING",
+        )
+
+        group = parser.add_argument_group(
+            "Tokenization options", "Set tokenizer options."
+        )
+        group.add_argument(
+            "-a",
+            "--analysis-window",
+            dest="analysis_window",
+            default=0.01,
+            type=float,
+            help="Size of analysis window in seconds [default: %(default)s "
+            "(10ms)]",
+            metavar="FLOAT",
+        )
+        group.add_argument(
+            "-n",
+            "--min-duration",
+            dest="min_duration",
+            type=float,
+            default=0.2,
+            help="Min duration of a valid audio event in seconds "
+            "[default: %(default)s]",
+            metavar="FLOAT",
+        )
+        group.add_argument(
+            "-m",
+            "--max-duration",
+            dest="max_duration",
+            type=float,
+            default=5,
+            help="Max duration of a valid audio event in seconds "
+            "[default: %(default)s]",
+            metavar="FLOAT",
+        )
+        group.add_argument(
+            "-s",
+            "--max-silence",
+            dest="max_silence",
+            type=float,
+            default=0.3,
+            help="Max duration of a consecutive silence within a valid audio "
+            "event in seconds [default: %(default)s]",
+            metavar="FLOAT",
+        )
+        group.add_argument(
+            "-d",
+            "--drop-trailing-silence",
+            dest="drop_trailing_silence",
+            action="store_true",
+            default=False,
+            help="Drop trailing silence from a detection [default: keep "
+            "trailing silence]",
+        )
+        group.add_argument(
+            "-R",
+            "--strict-min-duration",
+            dest="strict_min_duration",
+            action="store_true",
+            default=False,
+            help="Reject an event shorter than --min-duration even if it's "
+            "adjacent to the latest valid event that reached max-duration "
+            "[default: keep such events]",
+        )
+        group.add_argument(
+            "-e",
+            "--energy-threshold",
+            dest="energy_threshold",
+            type=float,
+            default=50,
+            help="Log energy threshold for detection [default: %(default)s]",
+            metavar="FLOAT",
+        )
+
+        group = parser.add_argument_group(
+            "Audio parameters",
+            "Define audio parameters if data is read from a "
+            "headerless file (raw or stdin) or you want to use "
+            "different microphone parameters.",
+        )
+        group.add_argument(
+            "-r",
+            "--rate",
+            dest="sampling_rate",
+            type=int,
+            default=16000,
+            help="Sampling rate of audio data [default: %(default)s]",
+            metavar="INT",
+        )
+        group.add_argument(
+            "-c",
+            "--channels",
+            dest="channels",
+            type=int,
+            default=1,
+            help="Number of channels of audio data [default: %(default)s]",
+            metavar="INT",
+        )
+        group.add_argument(
+            "-w",
+            "--width",
+            dest="sample_width",
+            type=int,
+            default=2,
+            help="Number of bytes per audio sample [default: %(default)s]",
+            metavar="INT",
+        )
+
+        group = parser.add_argument_group(
+            "Do something with audio events",
+            "Use these options to print, play back or plot detections.",
+        )
+        group.add_argument(
+            "-C",
+            "--command",
+            dest="command",
+            type=str,
+            help="Command to call when an audio detection occurs. Use '{file}' "
+            "as a placeholder for the temporary wav file that will contain "
+            "event's data (e.g., \"-C 'du -h {file}'\" to print out file size "
+            " or \"-C 'play -q {file}'\" to play audio with sox)",
+            metavar="STRING",
+        )
+        group.add_argument(
+            "-E",
+            "--echo",
+            dest="echo",
+            action="store_true",
+            default=False,
+            help="Play back each detection immediately using pyaudio",
+        )
+        group.add_argument(
+            "-B",
+            "--progress-bar",
+            dest="progress_bar",
+            action="store_true",
+            default=False,
+            help="Show a progress bar when playing audio",
+        )
+        group.add_argument(
+            "-p",
+            "--plot",
+            dest="plot",
+            action="store_true",
+            default=False,
+            help="Plot and show audio signal and detections (requires "
+            "matplotlib)",
+        )
+        group.add_argument(
+            "--save-image",
+            dest="save_image",
+            type=str,
+            help="Save plotted audio signal and detections as a picture or a "
+            "PDF file (requires matplotlib)",
+            metavar="FILE",
+        )
+        group.add_argument(
+            "--printf",
+            dest="printf",
+            type=str,
+            default="{id} {start} {end}",
+            help="Print audio events information, one per line, using this "
+            "format. Format can contain text with the following placeholders: "
+            "{id} (sequential, starts from 1), {start}, {end}, {duration} and "
+            "{timestamp}. The first 3 time placeholders are in seconds and "
+            "their format can be set using --time-format argument. "
+            "{timestamp} is the system timestamp (date and time) of the event "
+            "and can be set using --timestamp-format argument.\n"
+            "Example: '[{id}]: {start} -> {end} -- {timestamp}'",
+            metavar="STRING",
+        )
+        group.add_argument(
+            "--time-format",
+            dest="time_format",
+            type=str,
+            default="%S",
+            help="Format used to print {start}, {end} and {duration} "
+            "placeholders used with --printf [default= %(default)s]. The "
+            "following formats are accepted:\n"
+            "%%S: absolute time in seconds. %%I: absolute time in ms. If at "
+            "least one of (%%h, %%m, %%s, %%i) is used, convert time into "
+            "hours, minutes, seconds and millis (e.g. %%h:%%m:%%s.%%i). Only "
+            "supplied fields are printed. Note that %%S and %%I can only be "
+            "used alone",
+            metavar="STRING",
+        )
+        group.add_argument(
+            "--timestamp-format",
+            dest="timestamp_format",
+            type=str,
+            default="%Y/%m/%d %H:%M:%S",
+            help="Format used to print {timestamp}. Should be a format "
+            "accepted by 'datetime' standard module. Default: "
+            "'%%Y/%%m/%%d %%H:%%M:%%S'",
+        )
+        parser.add_argument(
+            "-q",
+            "--quiet",
+            dest="quiet",
+            action="store_true",
+            default=False,
+            help="Do not print any information about detections [default: "
+            "print 'id', 'start' and 'end' of each detection]",
+        )
+        parser.add_argument(
+            "-D",
+            "--debug",
+            dest="debug",
+            action="store_true",
+            default=False,
+            help="Print processing operations to STDOUT",
+        )
+        parser.add_argument(
+            "--debug-file",
+            dest="debug_file",
+            type=str,
+            default=None,
+            help="Print processing operations to FILE",
+            metavar="FILE",
+        )
+
+        args = parser.parse_args(argv)
+        logger = make_logger(args.debug, args.debug_file)
+        kwargs = make_kwargs(args)
+        reader, observers = initialize_workers(
+            logger=logger, **kwargs.io, **kwargs.miscellaneous
+        )
+        tokenizer_worker = workers.TokenizerWorker(
+            reader, observers, logger=logger, **kwargs.split
+        )
+        tokenizer_worker.start_all()
 
-        # process options
-        (opts, args) = parser.parse_args(argv)
-        
-        if opts.input == "-":
-            asource = StdinAudioSource(sampling_rate = opts.sampling_rate,
-                                       sample_width = opts.sample_width,
-                                       channels = opts.channels)
-        #read data from a file
-        elif opts.input is not None:
-            asource = file_to_audio_source(filename=opts.input, filetype=opts.input_type, uc=opts.use_channel)
-        
-        # read data from microphone via pyaudio
-        else:
-            try:
-                asource = PyAudioSource(sampling_rate = opts.sampling_rate,
-                                        sample_width = opts.sample_width,
-                                        channels = opts.channels)
-            except Exception:
-                sys.stderr.write("Cannot read data from audio device!\n")
-                sys.stderr.write("You should either install pyaudio or read data from STDIN\n")
-                sys.exit(2)
-               
-        logger = logging.getLogger(LOGGER_NAME)
-        logger.setLevel(logging.DEBUG)
-        
-        handler = logging.StreamHandler(sys.stdout)
-        if opts.quiet or not opts.debug:
-            # only critical messages will be printed
-            handler.setLevel(logging.CRITICAL)
-        else:
-            handler.setLevel(logging.DEBUG)
-        
-        logger.addHandler(handler)
-        
-        if opts.debug_file is not None:
-            logger.setLevel(logging.DEBUG)
-            opts.debug = True
-            handler = logging.FileHandler(opts.debug_file, "w")
-            fmt = logging.Formatter('[%(asctime)s] | %(message)s')
-            handler.setFormatter(fmt)
-            handler.setLevel(logging.DEBUG)
-            logger.addHandler(handler)
-        
-        record = opts.output_main is not None or opts.plot or opts.save_image is not None
-                        
-        ads = ADSFactory.ads(audio_source = asource, block_dur = opts.analysis_window, max_time = opts.max_time, record = record)
-        validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=opts.energy_threshold)
-        
-        
-        if opts.drop_trailing_silence:
-            mode = StreamTokenizer.DROP_TRAILING_SILENCE
-        else:
-            mode = 0
-        
-        analysis_window_per_second = 1. / opts.analysis_window
-        tokenizer = StreamTokenizer(validator=validator, min_length=opts.min_duration * analysis_window_per_second,
-                                    max_length=int(opts.max_duration * analysis_window_per_second),
-                                    max_continuous_silence=opts.max_silence * analysis_window_per_second,
-                                    mode = mode)
-        
-        
-        observers = []
-        tokenizer_worker = None
-        
-        if opts.output_tokens is not None:
-            
-            try:
-                # check user format is correct
-                fname  = opts.output_tokens.format(N=0, start=0, end=0)
-                
-                # find file type for detections
-                tok_type =  opts.output_type
-                if tok_type is None:
-                    tok_type = os.path.splitext(opts.output_tokens)[1][1:]
-                if tok_type == "": 
-                    tok_type = "wav"
-                
-                token_saver = TokenSaverWorker(name_format=opts.output_tokens, filetype=tok_type,
-                                               debug=opts.debug, logger=logger, sr=asource.get_sampling_rate(),
-                                               sw=asource.get_sample_width(),
-                                               ch=asource.get_channels())
-                observers.append(token_saver)
-            
-            except Exception:
-                sys.stderr.write("Wrong format for detections file name: '{0}'\n".format(opts.output_tokens))
-                sys.exit(2)
-            
-        if opts.echo:
-            try:
-                player = player_for(asource)
-                player_worker = PlayerWorker(player=player, debug=opts.debug, logger=logger)
-                observers.append(player_worker)
-            except Exception:
-                sys.stderr.write("Cannot get an audio player!\n")
-                sys.stderr.write("You should either install pyaudio or supply a command (-C option) to play audio\n")
-                sys.exit(2)
-                
-        if opts.command is not None and len(opts.command) > 0:
-            cmd_worker = CommandLineWorker(command=opts.command, debug=opts.debug, logger=logger)
-            observers.append(cmd_worker)
-        
-        if not opts.quiet or opts.plot is not None or opts.save_image is not None:    
-            oformat = opts.printf.replace("\\n", "\n").replace("\\t", "\t").replace("\\r", "\r")
-            converter = seconds_to_str_fromatter(opts.time_format)
-            log_worker = LogWorker(print_detections = not opts.quiet, output_format=oformat,
-                                   time_formatter=converter, logger=logger, debug=opts.debug)
-            observers.append(log_worker)
-        
-        tokenizer_worker = TokenizerWorker(ads, tokenizer, opts.analysis_window, observers)
-        
-        def _save_main_stream():
-            # find file type
-            main_type =  opts.output_type
-            if main_type is None:
-                main_type = os.path.splitext(opts.output_main)[1][1:]
-            if main_type == "": 
-                main_type = "wav"
-            ads.close()
-            ads.rewind()
-            data = ads.get_audio_source().get_data_buffer()
-            if len(data) > 0:
-                save_audio_data(data=data, filename=opts.output_main, filetype=main_type, sr=asource.get_sampling_rate(),
-                                sw = asource.get_sample_width(),
-                                ch = asource.get_channels())
-        
-        def _plot():
-            import numpy as np
-            ads.close()
-            ads.rewind()
-            data = ads.get_audio_source().get_data_buffer()
-            signal = AudioEnergyValidator._convert(data, asource.get_sample_width())
-            detections = [(det[3] , det[4]) for det in log_worker.detections]
-            max_amplitude = 2**(asource.get_sample_width() * 8 - 1) - 1
-            energy_as_amp = np.sqrt(np.exp(opts.energy_threshold * np.log(10) / 10)) / max_amplitude
-            plot_all(signal / max_amplitude, asource.get_sampling_rate(), energy_as_amp, detections, show = opts.plot, save_as = opts.save_image)
-        
-        
-        # start observer threads
-        for obs in observers:
-            obs.start()
-        # start tokenization thread
-        tokenizer_worker.start()
-        
         while True:
             time.sleep(1)
             if len(threading.enumerate()) == 1:
-                break
-            
-        tokenizer_worker = None
-            
-        if opts.output_main is not None:
-            _save_main_stream()
-        if opts.plot or opts.save_image is not None:
-            _plot()
-            
-        return 0
-            
-    except KeyboardInterrupt:
-        
+                raise EndOfProcessing
+
+    except (KeyboardInterrupt, EndOfProcessing):
         if tokenizer_worker is not None:
-            tokenizer_worker.stop()
-        for obs in observers:
-            obs.stop()
-            
-        if opts.output_main is not None:
-            _save_main_stream()
-        if opts.plot or opts.save_image is not None:
-            _plot()
-        
+            tokenizer_worker.stop_all()
+
+            if isinstance(reader, workers.StreamSaverWorker):
+                reader.join()
+                try:
+                    reader.save_stream()
+                except AudioEncodingWarning as ae_warn:
+                    print(str(ae_warn), file=sys.stderr)
+
+            if args.plot or args.save_image is not None:
+                from .plotting import plot
+
+                reader.rewind()
+                record = AudioRegion(
+                    reader.data, reader.sr, reader.sw, reader.ch
+                )
+                detections = (
+                    (det.start, det.end) for det in tokenizer_worker.detections
+                )
+                plot(
+                    record,
+                    detections=detections,
+                    energy_threshold=args.energy_threshold,
+                    show=True,
+                    save_as=args.save_image,
+                )
         return 0
 
-    except Exception as e:
-        sys.stderr.write(program_name + ": " + str(e) + "\n")
-        sys.stderr.write("for help use -h\n")
-        
-        return 2
 
 if __name__ == "__main__":
-    if DEBUG:
-        sys.argv.append("-h")
-    if TESTRUN:
-        import doctest
-        doctest.testmod()
-    if PROFILE:
-        import cProfile
-        import pstats
-        profile_filename = 'auditok.auditok_profile.txt'
-        cProfile.run('main()', profile_filename)
-        statsfile = open("profile_stats.txt", "wb")
-        p = pstats.Stats(profile_filename, stream=statsfile)
-        stats = p.strip_dirs().sort_stats('cumulative')
-        stats.print_stats()
-        statsfile.close()
-        sys.exit(0)
-    sys.exit(main())
+    sys.exit(main(None))
diff --git a/libs/auditok/cmdline_util.py b/libs/auditok/cmdline_util.py
new file mode 100755
index 000000000..bde72aa36
--- /dev/null
+++ b/libs/auditok/cmdline_util.py
@@ -0,0 +1,126 @@
+import sys
+import logging
+from collections import namedtuple
+from . import workers
+from .util import AudioDataSource
+from .io import player_for
+
+_AUDITOK_LOGGER = "AUDITOK_LOGGER"
+KeywordArguments = namedtuple(
+    "KeywordArguments", ["io", "split", "miscellaneous"]
+)
+
+
+def make_kwargs(args_ns):
+    if args_ns.save_stream is None:
+        record = args_ns.plot or (args_ns.save_image is not None)
+    else:
+        record = False
+    try:
+        use_channel = int(args_ns.use_channel)
+    except (ValueError, TypeError):
+        use_channel = args_ns.use_channel
+
+    io_kwargs = {
+        "input": args_ns.input,
+        "audio_format": args_ns.input_format,
+        "max_read": args_ns.max_read,
+        "block_dur": args_ns.analysis_window,
+        "sampling_rate": args_ns.sampling_rate,
+        "sample_width": args_ns.sample_width,
+        "channels": args_ns.channels,
+        "use_channel": use_channel,
+        "save_stream": args_ns.save_stream,
+        "save_detections_as": args_ns.save_detections_as,
+        "export_format": args_ns.output_format,
+        "large_file": args_ns.large_file,
+        "frames_per_buffer": args_ns.frame_per_buffer,
+        "input_device_index": args_ns.input_device_index,
+        "record": record,
+    }
+
+    split_kwargs = {
+        "min_dur": args_ns.min_duration,
+        "max_dur": args_ns.max_duration,
+        "max_silence": args_ns.max_silence,
+        "drop_trailing_silence": args_ns.drop_trailing_silence,
+        "strict_min_dur": args_ns.strict_min_duration,
+        "energy_threshold": args_ns.energy_threshold,
+    }
+
+    miscellaneous = {
+        "echo": args_ns.echo,
+        "progress_bar": args_ns.progress_bar,
+        "command": args_ns.command,
+        "quiet": args_ns.quiet,
+        "printf": args_ns.printf,
+        "time_format": args_ns.time_format,
+        "timestamp_format": args_ns.timestamp_format,
+    }
+    return KeywordArguments(io_kwargs, split_kwargs, miscellaneous)
+
+
+def make_logger(stderr=False, file=None, name=_AUDITOK_LOGGER):
+    if not stderr and file is None:
+        return None
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.INFO)
+    if stderr:
+        handler = logging.StreamHandler(sys.stderr)
+        handler.setLevel(logging.INFO)
+        logger.addHandler(handler)
+
+    if file is not None:
+        handler = logging.FileHandler(file, "w")
+        fmt = logging.Formatter("[%(asctime)s] | %(message)s")
+        handler.setFormatter(fmt)
+        handler.setLevel(logging.INFO)
+        logger.addHandler(handler)
+    return logger
+
+
+def initialize_workers(logger=None, **kwargs):
+    observers = []
+    reader = AudioDataSource(source=kwargs["input"], **kwargs)
+    if kwargs["save_stream"] is not None:
+        reader = workers.StreamSaverWorker(
+            reader,
+            filename=kwargs["save_stream"],
+            export_format=kwargs["export_format"],
+        )
+        reader.start()
+
+    if kwargs["save_detections_as"] is not None:
+        worker = workers.RegionSaverWorker(
+            kwargs["save_detections_as"],
+            kwargs["export_format"],
+            logger=logger,
+        )
+        observers.append(worker)
+
+    if kwargs["echo"]:
+        player = player_for(reader)
+        worker = workers.PlayerWorker(
+            player, progress_bar=kwargs["progress_bar"], logger=logger
+        )
+        observers.append(worker)
+
+    if kwargs["command"] is not None:
+        worker = workers.CommandLineWorker(
+            command=kwargs["command"], logger=logger
+        )
+        observers.append(worker)
+
+    if not kwargs["quiet"]:
+        print_format = (
+            kwargs["printf"]
+            .replace("\\n", "\n")
+            .replace("\\t", "\t")
+            .replace("\\r", "\r")
+        )
+        worker = workers.PrintWorker(
+            print_format, kwargs["time_format"], kwargs["timestamp_format"]
+        )
+        observers.append(worker)
+
+    return reader, observers
diff --git a/libs/auditok/core.py b/libs/auditok/core.py
index 47441d2b7..af00dc7af 100644
--- a/libs/auditok/core.py
+++ b/libs/auditok/core.py
@@ -1,264 +1,1267 @@
 """
-This module gathers processing (i.e. tokenization) classes.
-
-Class summary
-=============
-
 .. autosummary::
+    :toctree: generated/
 
-        StreamTokenizer
+    load
+    split
+    AudioRegion
+    StreamTokenizer
 """
+import os
+import math
+from .util import AudioReader, DataValidator, AudioEnergyValidator
+from .io import check_audio_data, to_file, player_for, get_audio_source
+from .exceptions import TooSamllBlockDuration
+
+try:
+    from . import signal_numpy as signal
+except ImportError:
+    from . import signal
+
+__all__ = ["load", "split", "AudioRegion", "StreamTokenizer"]
+
+
+DEFAULT_ANALYSIS_WINDOW = 0.05
+DEFAULT_ENERGY_THRESHOLD = 50
+_EPSILON = 1e-10
+
+
+def load(input, skip=0, max_read=None, **kwargs):
+    """Load audio data from a source and return it as an :class:`AudioRegion`.
+
+    Parameters
+    ----------
+    input : None, str, bytes, AudioSource
+        source to read audio data from. If `str`, it should be a path to a
+        valid audio file. If `bytes`, it is used as raw audio data. If it is
+        "-", raw data will be read from stdin. If None, read audio data from
+        the microphone using PyAudio. If of type `bytes` or is a path to a
+        raw audio file then `sampling_rate`, `sample_width` and `channels`
+        parameters (or their alias) are required. If it's an
+        :class:`AudioSource` object it's used directly to read data.
+    skip : float, default: 0
+        amount, in seconds, of audio data to skip from source. If read from
+        a microphone, `skip` must be 0, otherwise a `ValueError` is raised.
+    max_read : float, default: None
+        amount, in seconds, of audio data to read from source. If read from
+        microphone, `max_read` should not be None, otherwise a `ValueError` is
+        raised.
+    audio_format, fmt : str
+        type of audio data (e.g., wav, ogg, flac, raw, etc.). This will only
+        be used if `input` is a string path to an audio file. If not given,
+        audio type will be guessed from file name extension or from file
+        header.
+    sampling_rate, sr : int
+        sampling rate of audio data. Required if `input` is a raw audio file,
+        a `bytes` object or None (i.e., read from microphone).
+    sample_width, sw : int
+        number of bytes used to encode one audio sample, typically 1, 2 or 4.
+        Required for raw data, see `sampling_rate`.
+    channels, ch : int
+        number of channels of audio data. Required for raw data, see
+        `sampling_rate`.
+    large_file : bool, default: False
+        If True, AND if `input` is a path to a *wav* of a *raw* audio file
+        (and **only** these two formats) then audio file is not fully loaded to
+        memory in order to create the region (but the portion of data needed to
+        create the region is of course loaded to memory). Set to True if
+        `max_read` is significantly smaller then the size of a large audio file
+        that shouldn't be entirely loaded to memory.
+
+    Returns
+    -------
+    region: AudioRegion
+
+    Raises
+    ------
+    ValueError
+        raised if `input` is None (i.e., read data from microphone) and `skip`
+        != 0 or `input` is None `max_read` is None (meaning that when reading
+        from the microphone, no data should be skipped, and maximum amount of
+        data to read should be explicitly provided).
+    """
+    return AudioRegion.load(input, skip, max_read, **kwargs)
+
+
+def split(
+    input,
+    min_dur=0.2,
+    max_dur=5,
+    max_silence=0.3,
+    drop_trailing_silence=False,
+    strict_min_dur=False,
+    **kwargs
+):
+    """
+    Split audio data and return a generator of AudioRegions
+
+    Parameters
+    ----------
+    input : str, bytes, AudioSource, AudioReader, AudioRegion or None
+        input audio data. If str, it should be a path to an existing audio file.
+        "-" is interpreted as standard input. If bytes, input is considered as
+        raw audio data. If None, read audio from microphone.
+        Every object that is not an `AudioReader` will be transformed into an
+        `AudioReader` before processing. If it is an `str` that refers to a raw
+        audio file, `bytes` or None, audio parameters should be provided using
+        kwargs (i.e., `samplig_rate`, `sample_width` and `channels` or their
+        alias).
+        If `input` is str then audio format will be guessed from file extension.
+        `audio_format` (alias `fmt`) kwarg can also be given to specify audio
+        format explicitly. If none of these options is available, rely on
+        backend (currently only pydub is supported) to load data.
+    min_dur : float, default: 0.2
+        minimun duration in seconds of a detected audio event. By using large
+        values for `min_dur`, very short audio events (e.g., very short 1-word
+        utterances like 'yes' or 'no') can be mis detected. Using very short
+        values might result in a high number of short, unuseful audio events.
+    max_dur : float, default: 5
+        maximum duration in seconds of a detected audio event. If an audio event
+        lasts more than `max_dur` it will be truncated. If the continuation of a
+        truncated audio event is shorter than `min_dur` then this continuation
+        is accepted as a valid audio event if `strict_min_dur` is False.
+        Otherwise it is rejected.
+    max_silence : float, default: 0.3
+        maximum duration of continuous silence within an audio event. There
+        might be many silent gaps of this duration within one audio event. If
+        the continuous silence happens at the end of the event than it's kept as
+        part of the event if `drop_trailing_silence` is False (default).
+    drop_trailing_silence : bool, default: False
+        Whether to remove trailing silence from detected events. To avoid abrupt
+        cuts in speech, trailing silence should be kept, therefore this
+        parameter should be False.
+    strict_min_dur : bool, default: False
+        strict minimum duration. Do not accept an audio event if it is shorter
+        than `min_dur` even if it is contiguous to the latest valid event. This
+        happens if the the latest detected event had reached `max_dur`.
+
+    Other Parameters
+    ----------------
+    analysis_window, aw : float, default: 0.05 (50 ms)
+        duration of analysis window in seconds. A value between 0.01 (10 ms) and
+        0.1 (100 ms) should be good for most use-cases.
+    audio_format, fmt : str
+        type of audio data (e.g., wav, ogg, flac, raw, etc.). This will only be
+        used if `input` is a string path to an audio file. If not given, audio
+        type will be guessed from file name extension or from file header.
+    sampling_rate, sr : int
+        sampling rate of audio data. Required if `input` is a raw audio file, is
+        a bytes object or None (i.e., read from microphone).
+    sample_width, sw : int
+        number of bytes used to encode one audio sample, typically 1, 2 or 4.
+        Required for raw data, see `sampling_rate`.
+    channels, ch : int
+        number of channels of audio data. Required for raw data, see
+        `sampling_rate`.
+    use_channel, uc : {None, "mix"} or int
+        which channel to use for split if `input` has multiple audio channels.
+        Regardless of which channel is used for splitting, returned audio events
+        contain data from *all* channels, just as `input`.
+        The following values are accepted:
+
+        - None (alias "any"): accept audio activity from any channel, even if
+          other channels are silent. This is the default behavior.
+
+        - "mix" ("avg" or "average"): mix down all channels (i.e. compute
+          average channel) and split the resulting channel.
+
+        - int (0 <=, > `channels`): use one channel, specified by integer id,
+          for split.
+
+    large_file : bool, default: False
+        If True, AND if `input` is a path to a *wav* of a *raw* audio file
+        (and only these two formats) then audio data is lazily loaded to memory
+        (i.e., one analysis window a time). Otherwise the whole file is loaded
+        to memory before split. Set to True if the size of the file is larger
+        than available memory.
+    max_read, mr : float, default: None, read until end of stream
+        maximum data to read from source in seconds.
+    validator, val : callable, DataValidator
+        custom data validator. If `None` (default), an `AudioEnergyValidor` is
+        used with the given energy threshold. Can be a callable or an instance
+        of `DataValidator` that implements `is_valid`. In either case, it'll be
+        called with with a window of audio data as the first parameter.
+    energy_threshold, eth : float, default: 50
+        energy threshold for audio activity detection. Audio regions that have
+        enough windows of with a signal energy equal to or above this threshold
+        are considered valid audio events. Here we are referring to this amount
+        as the energy of the signal but to be more accurate, it is the log
+        energy of computed as: `20 * log10(sqrt(dot(x, x) / len(x)))` (see
+        :class:`AudioEnergyValidator` and
+        :func:`calculate_energy_single_channel`). If `validator` is given, this
+        argument is ignored.
+
+    Yields
+    ------
+    AudioRegion
+        a generator of detected :class:`AudioRegion` s.
+    """
+    if min_dur <= 0:
+        raise ValueError("'min_dur' ({}) must be > 0".format(min_dur))
+    if max_dur <= 0:
+        raise ValueError("'max_dur' ({}) must be > 0".format(max_dur))
+    if max_silence < 0:
+        raise ValueError("'max_silence' ({}) must be >= 0".format(max_silence))
+
+    if isinstance(input, AudioReader):
+        source = input
+        analysis_window = source.block_dur
+    else:
+        analysis_window = kwargs.get(
+            "analysis_window", kwargs.get("aw", DEFAULT_ANALYSIS_WINDOW)
+        )
+        if analysis_window <= 0:
+            raise ValueError(
+                "'analysis_window' ({}) must be > 0".format(analysis_window)
+            )
+
+        params = kwargs.copy()
+        params["max_read"] = params.get("max_read", params.get("mr"))
+        params["audio_format"] = params.get("audio_format", params.get("fmt"))
+        if isinstance(input, AudioRegion):
+            params["sampling_rate"] = input.sr
+            params["sample_width"] = input.sw
+            params["channels"] = input.ch
+            input = bytes(input)
+        try:
+            source = AudioReader(input, block_dur=analysis_window, **params)
+        except TooSamllBlockDuration as exc:
+            err_msg = "Too small 'analysis_windows' ({0}) for sampling rate "
+            err_msg += "({1}). Analysis windows should at least be 1/{1} to "
+            err_msg += "cover one single data sample"
+            raise ValueError(err_msg.format(exc.block_dur, exc.sampling_rate))
+
+    validator = kwargs.get("validator", kwargs.get("val"))
+    if validator is None:
+        energy_threshold = kwargs.get(
+            "energy_threshold", kwargs.get("eth", DEFAULT_ENERGY_THRESHOLD)
+        )
+        use_channel = kwargs.get("use_channel", kwargs.get("uc"))
+        validator = AudioEnergyValidator(
+            energy_threshold, source.sw, source.ch, use_channel=use_channel
+        )
+    mode = StreamTokenizer.DROP_TRAILING_SILENCE if drop_trailing_silence else 0
+    if strict_min_dur:
+        mode |= StreamTokenizer.STRICT_MIN_LENGTH
+    min_length = _duration_to_nb_windows(min_dur, analysis_window, math.ceil)
+    max_length = _duration_to_nb_windows(
+        max_dur, analysis_window, math.floor, _EPSILON
+    )
+    max_continuous_silence = _duration_to_nb_windows(
+        max_silence, analysis_window, math.floor, _EPSILON
+    )
+
+    err_msg = "({0} sec.) results in {1} analysis window(s) "
+    err_msg += "({1} == {6}({0} / {2})) which is {5} the number "
+    err_msg += "of analysis window(s) for 'max_dur' ({3} == floor({4} / {2}))"
+    if min_length > max_length:
+        err_msg = "'min_dur' " + err_msg
+        raise ValueError(
+            err_msg.format(
+                min_dur,
+                min_length,
+                analysis_window,
+                max_length,
+                max_dur,
+                "higher than",
+                "ceil",
+            )
+        )
+
+    if max_continuous_silence >= max_length:
+        err_msg = "'max_silence' " + err_msg
+        raise ValueError(
+            err_msg.format(
+                max_silence,
+                max_continuous_silence,
+                analysis_window,
+                max_length,
+                max_dur,
+                "higher or equal to",
+                "floor",
+            )
+        )
+
+    tokenizer = StreamTokenizer(
+        validator, min_length, max_length, max_continuous_silence, mode=mode
+    )
+    source.open()
+    token_gen = tokenizer.tokenize(source, generator=True)
+    region_gen = (
+        _make_audio_region(
+            token[0],
+            token[1],
+            source.block_dur,
+            source.sr,
+            source.sw,
+            source.ch,
+        )
+        for token in token_gen
+    )
+    return region_gen
+
+
+def _duration_to_nb_windows(
+    duration, analysis_window, round_fn=round, epsilon=0
+):
+    """
+    Converts a given duration into a positive integer of analysis windows.
+    if `duration / analysis_window` is not an integer, the result will be
+    rounded to the closest bigger integer. If `duration == 0`, returns `0`.
+    If `duration < analysis_window`, returns 1.
+    `duration` and `analysis_window` can be in seconds or milliseconds but
+    must be in the same unit.
+
+    Parameters
+    ----------
+    duration : float
+        a given duration in seconds or ms.
+    analysis_window: float
+        size of analysis window, in the same unit as `duration`.
+    round_fn : callable
+        function called to round the result. Default: `round`.
+    epsilon : float
+        small value to add to the division result before rounding.
+        E.g., `0.3 / 0.1 = 2.9999999999999996`, when called with
+        `round_fn=math.floor` returns `2` instead of `3`. Adding a small value
+        to `0.3 / 0.1` avoids this error.
+
+    Returns
+    -------
+    nb_windows : int
+        minimum number of `analysis_window`'s to cover `durartion`. That means
+        that `analysis_window * nb_windows >= duration`.
+    """
+    if duration < 0 or analysis_window <= 0:
+        err_msg = "'duration' ({}) must be >= 0 and 'analysis_window' ({}) > 0"
+        raise ValueError(err_msg.format(duration, analysis_window))
+    if duration == 0:
+        return 0
+    return int(round_fn(duration / analysis_window + epsilon))
+
+
+def _make_audio_region(
+    data_frames,
+    start_frame,
+    frame_duration,
+    sampling_rate,
+    sample_width,
+    channels,
+):
+    """
+    Helper function to create an `AudioRegion` from parameters returned by
+    tokenization object. It takes care of setting up region `start` and `end`
+    in metadata.
+
+    Parameters
+    ----------
+    frame_duration: float
+        duration of analysis window in seconds
+    start_frame : int
+        index of the fisrt analysis window
+    samling_rate : int
+        sampling rate of audio data
+    sample_width : int
+        number of bytes of one audio sample
+    channels : int
+        number of channels of audio data
+
+    Returns
+    -------
+    audio_region : AudioRegion
+        AudioRegion whose start time is calculeted as:
+        `1000 * start_frame * frame_duration`
+    """
+    start = start_frame * frame_duration
+    data = b"".join(data_frames)
+    duration = len(data) / (sampling_rate * sample_width * channels)
+    meta = {"start": start, "end": start + duration}
+    return AudioRegion(data, sampling_rate, sample_width, channels, meta)
+
+
+def _read_chunks_online(max_read, **kwargs):
+    """
+    Helper function to read audio data from an online blocking source
+    (i.e., microphone). Used to build an `AudioRegion` and can intercept
+    KeyboardInterrupt so that reading stops as soon as this exception is
+    raised. Makes building `AudioRegion`s on [i]python sessions and jupyter
+    notebooks more user friendly.
+
+    Parameters
+    ----------
+    max_read : float
+        maximum amount of data to read in seconds.
+    kwargs :
+        audio parameters (sampling_rate, sample_width and channels).
+
+    See also
+    --------
+    `AudioRegion.build`
+    """
+    reader = AudioReader(None, block_dur=0.5, max_read=max_read, **kwargs)
+    reader.open()
+    data = []
+    try:
+        while True:
+            frame = reader.read()
+            if frame is None:
+                break
+            data.append(frame)
+    except KeyboardInterrupt:
+        # Stop data acquisition from microphone when pressing
+        # Ctrl+C on a [i]python session or a notebook
+        pass
+    reader.close()
+    return (
+        b"".join(data),
+        reader.sampling_rate,
+        reader.sample_width,
+        reader.channels,
+    )
+
+
+def _read_offline(input, skip=0, max_read=None, **kwargs):
+    """
+    Helper function to read audio data from an offline (i.e., file). Used to
+    build `AudioRegion`s.
+
+    Parameters
+    ----------
+    input : str, bytes
+        path to audio file (if str), or a bytes object representing raw audio
+        data.
+    skip : float, default 0
+        amount of data to skip from the begining of audio source.
+    max_read : float, default: None
+        maximum amount of audio data to read. Default: None, means read until
+        end of stream.
+    kwargs :
+        audio parameters (sampling_rate, sample_width and channels).
+
+    See also
+    --------
+    `AudioRegion.build`
+
+    """
+    audio_source = get_audio_source(input, **kwargs)
+    audio_source.open()
+    if skip is not None and skip > 0:
+        skip_samples = round(skip * audio_source.sampling_rate)
+        audio_source.read(skip_samples)
+    if max_read is not None:
+        if max_read < 0:
+            max_read = None
+        else:
+            max_read = round(max_read * audio_source.sampling_rate)
+    data = audio_source.read(max_read)
+    audio_source.close()
+    return (
+        data,
+        audio_source.sampling_rate,
+        audio_source.sample_width,
+        audio_source.channels,
+    )
+
+
+def _check_convert_index(index, types, err_msg):
+    if not isinstance(index, slice) or index.step is not None:
+        raise TypeError(err_msg)
+    start = index.start if index.start is not None else 0
+    stop = index.stop
+    for index in (start, stop):
+        if index is not None and not isinstance(index, types):
+            raise TypeError(err_msg)
+    return start, stop
+
+
+class _SecondsView:
+    """A class to create a view of `AudioRegion` that can be sliced using
+    indices in seconds.
+    """
+
+    def __init__(self, region):
+        self._region = region
+
+    def __getitem__(self, index):
+        err_msg = "Slicing AudioRegion by seconds requires indices of type "
+        err_msg += "'int' or 'float' without a step (e.g. region.sec[7.5:10])"
+        start_s, stop_s = _check_convert_index(index, (int, float), err_msg)
+        sr = self._region.sampling_rate
+        start_sample = int(start_s * sr)
+        stop_sample = None if stop_s is None else round(stop_s * sr)
+        return self._region[start_sample:stop_sample]
+
+    @property
+    def len(self):
+        """
+        Return region duration in seconds.
+        """
+        return self._region.duration
+
+
+class _MillisView(_SecondsView):
+    """A class to create a view of `AudioRegion` that can be sliced using
+    indices in milliseconds.
+    """
+
+    def __getitem__(self, index):
+        err_msg = (
+            "Slicing AudioRegion by milliseconds requires indices of type "
+        )
+        err_msg += "'int' without a step (e.g. region.sec[500:1500])"
+        start_ms, stop_ms = _check_convert_index(index, (int), err_msg)
+        start_sec = start_ms / 1000
+        stop_sec = None if stop_ms is None else stop_ms / 1000
+        index = slice(start_sec, stop_sec)
+        return super(_MillisView, self).__getitem__(index)
+
+    def __len__(self):
+        """
+        Return region duration in milliseconds.
+        """
+        return round(self._region.duration * 1000)
+
+    @property
+    def len(self):
+        """
+        Return region duration in milliseconds.
+        """
+        return len(self)
+
+
+class _AudioRegionMetadata(dict):
+    """A class to store `AudioRegion`'s metadata."""
+
+    def __getattr__(self, name):
+        if name in self:
+            return self[name]
+        else:
+            err_msg = "AudioRegion metadata has no entry '{}'"
+            raise AttributeError(err_msg.format(name))
+
+    def __setattr__(self, name, value):
+        self[name] = value
+
+    def __str__(self):
+        return "\n".join("{}: {}".format(k, v) for k, v in self.items())
+
+    def __repr__(self):
+        return str(self)
+
+
+class AudioRegion(object):
+    """
+    AudioRegion encapsulates raw audio data and provides an interface to
+    perform simple operations on it. Use `AudioRegion.load` to build an
+    `AudioRegion` from different types of objects.
+
+    Parameters
+    ----------
+    data : bytes
+        raw audio data as a bytes object
+    sampling_rate : int
+        sampling rate of audio data
+    sample_width : int
+        number of bytes of one audio sample
+    channels : int
+        number of channels of audio data
+    meta : dict, default: None
+        any collection of <key:value> elements used to build metadata for
+        this `AudioRegion`. Meta data can be accessed via `region.meta.key`
+        if `key` is a valid python attribute name, or via `region.meta[key]`
+        if not. Note that the :func:`split` function (or the
+        :meth:`AudioRegion.split` method) returns `AudioRegions` with a ``start``
+        and a ``stop`` meta values that indicate the location in seconds of the
+        region in original audio data.
+
+    See also
+    --------
+    AudioRegion.load
+
+    """
+
+    def __init__(self, data, sampling_rate, sample_width, channels, meta=None):
+        check_audio_data(data, sample_width, channels)
+        self._data = data
+        self._sampling_rate = sampling_rate
+        self._sample_width = sample_width
+        self._channels = channels
+        self._samples = None
+        self.splitp = self.split_and_plot
+
+        if meta is not None:
+            self._meta = _AudioRegionMetadata(meta)
+        else:
+            self._meta = None
+
+        self._seconds_view = _SecondsView(self)
+        self.sec = self.seconds
+        self.s = self.seconds
+
+        self._millis_view = _MillisView(self)
+        self.ms = self.millis
+
+    @property
+    def meta(self):
+        return self._meta
+
+    @meta.setter
+    def meta(self, new_meta):
+        """Meta data of audio region."""
+        self._meta = _AudioRegionMetadata(new_meta)
+
+    @classmethod
+    def load(cls, input, skip=0, max_read=None, **kwargs):
+        """
+        Create an `AudioRegion` by loading data from `input`. See :func:`load`
+        for parameters descripion.
+
+        Returns
+        -------
+        region: AudioRegion
+
+        Raises
+        ------
+        ValueError
+            raised if `input` is None and `skip` != 0 or `max_read` is None.
+        """
+        if input is None:
+            if skip > 0:
+                raise ValueError(
+                    "'skip' should be 0 when reading from microphone"
+                )
+            if max_read is None or max_read < 0:
+                raise ValueError(
+                    "'max_read' should not be None when reading from "
+                    "microphone"
+                )
+            data, sampling_rate, sample_width, channels = _read_chunks_online(
+                max_read, **kwargs
+            )
+        else:
+            data, sampling_rate, sample_width, channels = _read_offline(
+                input, skip=skip, max_read=max_read, **kwargs
+            )
+
+        return cls(data, sampling_rate, sample_width, channels)
+
+    @property
+    def seconds(self):
+        """
+        A view to slice audio region by seconds (using ``region.seconds[start:end]``).
+        """
+        return self._seconds_view
+
+    @property
+    def millis(self):
+        """A view to slice audio region by milliseconds (using ``region.millis[start:end]``)."""
+        return self._millis_view
+
+    @property
+    def duration(self):
+        """
+        Returns region duration in seconds.
+        """
+        return len(self._data) / (
+            self.sampling_rate * self.sample_width * self.channels
+        )
+
+    @property
+    def sampling_rate(self):
+        """Samling rate of audio data."""
+        return self._sampling_rate
+
+    @property
+    def sr(self):
+        """Samling rate of audio data, alias for `sampling_rate`."""
+        return self._sampling_rate
+
+    @property
+    def sample_width(self):
+        """Number of bytes per sample, one channel considered."""
+        return self._sample_width
+
+    @property
+    def sw(self):
+        """Number of bytes per sample, alias for `sampling_rate`."""
+        return self._sample_width
+
+    @property
+    def channels(self):
+        """Number of channels of audio data."""
+        return self._channels
+
+    @property
+    def ch(self):
+        """Number of channels of audio data, alias for `channels`."""
+        return self._channels
+
+    def play(self, progress_bar=False, player=None, **progress_bar_kwargs):
+        """
+        Play audio region.
+
+        Parameters
+        ----------
+        progress_bar : bool, default: False
+            whether to use a progress bar while playing audio. Default: False.
+            `progress_bar` requires `tqdm`, if not installed, no progress bar
+            will be shown.
+        player : AudioPalyer, default: None
+            audio player to use. if None (default), use `player_for()`
+            to get a new audio player.
+        progress_bar_kwargs : kwargs
+            keyword arguments to pass to `tqdm` progress_bar builder (e.g.,
+            use `leave=False` to clean up the screen when play finishes).
+        """
+        if player is None:
+            player = player_for(self)
+        player.play(
+            self._data, progress_bar=progress_bar, **progress_bar_kwargs
+        )
+
+    def save(self, file, audio_format=None, exists_ok=True, **audio_parameters):
+        """
+        Save audio region to file.
 
-from auditok.util import DataValidator
+        Parameters
+        ----------
+        file : str
+            path to output audio file. May contain `{duration}` placeholder
+            as well as any place holder that this region's metadata might
+            contain (e.g., regions returned by `split` contain metadata with
+            `start` and `end` attributes that can be used to build output file
+            name as `{meta.start}` and `{meta.end}`. See examples using
+            placeholders with formatting.
 
-__all__ = ["StreamTokenizer"]
+        audio_format : str, default: None
+            format used to save audio data. If None (default), format is guessed
+            from file name's extension. If file name has no extension, audio
+            data is saved as a raw (headerless) audio file.
+        exists_ok : bool, default: True
+            If True, overwrite `file` if a file with the same name exists.
+            If False, raise an `IOError` if `file` exists.
+        audio_parameters: dict
+            any keyword arguments to be passed to audio saving backend.
 
+        Returns
+        -------
+        file: str
+            name of output file with replaced placehoders.
+        Raises
+            IOError if `file` exists and `exists_ok` is False.
 
-class StreamTokenizer():
+
+        Examples
+        --------
+        >>> region = AudioRegion(b'\\0' * 2 * 24000,
+        >>>                      sampling_rate=16000,
+        >>>                      sample_width=2,
+        >>>                      channels=1)
+        >>> region.meta.start = 2.25
+        >>> region.meta.end = 2.25 + region.duration
+        >>> region.save('audio_{meta.start}-{meta.end}.wav')
+        >>> audio_2.25-3.75.wav
+        >>> region.save('region_{meta.start:.3f}_{duration:.3f}.wav')
+        audio_2.250_1.500.wav
+        """
+        if isinstance(file, str):
+            file = file.format(duration=self.duration, meta=self.meta)
+            if not exists_ok and os.path.exists(file):
+                raise FileExistsError("file '{file}' exists".format(file=file))
+        to_file(
+            self._data,
+            file,
+            audio_format,
+            sr=self.sr,
+            sw=self.sw,
+            ch=self.ch,
+            audio_parameters=audio_parameters,
+        )
+        return file
+
+    def split(
+        self,
+        min_dur=0.2,
+        max_dur=5,
+        max_silence=0.3,
+        drop_trailing_silence=False,
+        strict_min_dur=False,
+        **kwargs
+    ):
+        """Split audio region. See :func:`auditok.split()` for a comprehensive
+        description of split parameters.
+        See Also :meth:`AudioRegio.split_and_plot`.
+        """
+        if kwargs.get("max_read", kwargs.get("mr")) is not None:
+            warn_msg = "'max_read' (or 'mr') should not be used with "
+            warn_msg += "AudioRegion.split_and_plot(). You should rather "
+            warn_msg += "slice audio region before calling this method"
+            raise RuntimeWarning(warn_msg)
+        return split(
+            self,
+            min_dur=min_dur,
+            max_dur=max_dur,
+            max_silence=max_silence,
+            drop_trailing_silence=drop_trailing_silence,
+            strict_min_dur=strict_min_dur,
+            **kwargs
+        )
+
+    def plot(
+        self,
+        scale_signal=True,
+        show=True,
+        figsize=None,
+        save_as=None,
+        dpi=120,
+        theme="auditok",
+    ):
+        """Plot audio region, one sub-plot for each channel.
+
+        Parameters
+        ----------
+        scale_signal : bool, default: True
+            if true, scale signal by subtracting its mean and dividing by its
+            standard deviation before plotting.
+        show : bool
+            whether to show plotted signal right after the call.
+        figsize : tuple, default: None
+            width and height of the figure to pass to `matplotlib`.
+        save_as : str, default None.
+            if provided, also save plot to file.
+        dpi : int, default: 120
+            plot dpi to pass to `matplotlib`.
+        theme : str or dict, default: "auditok"
+            plot theme to use. Currently only "auditok" theme is implemented. To
+            provide you own them see :attr:`auditok.plotting.AUDITOK_PLOT_THEME`.
+        """
+        try:
+            from auditok.plotting import plot
+
+            plot(
+                self,
+                scale_signal=scale_signal,
+                show=show,
+                figsize=figsize,
+                save_as=save_as,
+                dpi=dpi,
+                theme=theme,
+            )
+        except ImportError:
+            raise RuntimeWarning("Plotting requires matplotlib")
+
+    def split_and_plot(
+        self,
+        min_dur=0.2,
+        max_dur=5,
+        max_silence=0.3,
+        drop_trailing_silence=False,
+        strict_min_dur=False,
+        scale_signal=True,
+        show=True,
+        figsize=None,
+        save_as=None,
+        dpi=120,
+        theme="auditok",
+        **kwargs
+    ):
+        """Split region and plot signal and detections. Alias: :meth:`splitp`.
+        See :func:`auditok.split()` for a comprehensive description of split
+        parameters. Also see :meth:`plot` for plot parameters.
+        """
+        try:
+            from auditok.plotting import plot
+
+            regions = self.split(
+                min_dur=min_dur,
+                max_dur=max_dur,
+                max_silence=max_silence,
+                drop_trailing_silence=drop_trailing_silence,
+                strict_min_dur=strict_min_dur,
+                **kwargs
+            )
+            regions = list(regions)
+            detections = ((reg.meta.start, reg.meta.end) for reg in regions)
+            eth = kwargs.get(
+                "energy_threshold", kwargs.get("eth", DEFAULT_ENERGY_THRESHOLD)
+            )
+            plot(
+                self,
+                scale_signal=scale_signal,
+                detections=detections,
+                energy_threshold=eth,
+                show=show,
+                figsize=figsize,
+                save_as=save_as,
+                dpi=dpi,
+                theme=theme,
+            )
+            return regions
+        except ImportError:
+            raise RuntimeWarning("Plotting requires matplotlib")
+
+    def __array__(self):
+        return self.samples
+
+    @property
+    def samples(self):
+        """Audio region as arrays of samples, one array per channel."""
+        if self._samples is None:
+            self._samples = signal.to_array(
+                self._data, self.sample_width, self.channels
+            )
+        return self._samples
+
+    def __len__(self):
+        """
+        Return region length in number of samples.
+        """
+        return len(self._data) // (self.sample_width * self.channels)
+
+    @property
+    def len(self):
+        """
+        Return region length in number of samples.
+        """
+        return len(self)
+
+    def __bytes__(self):
+        return self._data
+
+    def __str__(self):
+        return (
+            "AudioRegion(duration={:.3f}, "
+            "sampling_rate={}, sample_width={}, channels={})".format(
+                self.duration, self.sr, self.sw, self.ch
+            )
+        )
+
+    def __repr__(self):
+        return str(self)
+
+    def __add__(self, other):
+        """
+        Concatenates this region and `other` and return a new region.
+        Both regions must have the same sampling rate, sample width
+        and number of channels. If not, raises a `ValueError`.
+        """
+        if not isinstance(other, AudioRegion):
+            raise TypeError(
+                "Can only concatenate AudioRegion, "
+                'not "{}"'.format(type(other))
+            )
+        if other.sr != self.sr:
+            raise ValueError(
+                "Can only concatenate AudioRegions of the same "
+                "sampling rate ({} != {})".format(self.sr, other.sr)
+            )
+        if other.sw != self.sw:
+            raise ValueError(
+                "Can only concatenate AudioRegions of the same "
+                "sample width ({} != {})".format(self.sw, other.sw)
+            )
+        if other.ch != self.ch:
+            raise ValueError(
+                "Can only concatenate AudioRegions of the same "
+                "number of channels ({} != {})".format(self.ch, other.ch)
+            )
+        data = self._data + other._data
+        return AudioRegion(data, self.sr, self.sw, self.ch)
+
+    def __radd__(self, other):
+        """
+        Concatenates `other` and this region. `other` should be an
+        `AudioRegion` with the same audio parameters as this region
+        but can exceptionally be `0` to make it possible to concatenate
+        many regions with `sum`.
+        """
+        if other == 0:
+            return self
+        return other.add(self)
+
+    def __mul__(self, n):
+        if not isinstance(n, int):
+            err_msg = "Can't multiply AudioRegion by a non-int of type '{}'"
+            raise TypeError(err_msg.format(type(n)))
+        data = self._data * n
+        return AudioRegion(data, self.sr, self.sw, self.ch)
+
+    def __rmul__(self, n):
+        return self * n
+
+    def __truediv__(self, n):
+        if not isinstance(n, int) or n <= 0:
+            raise TypeError("AudioRegion can only be divided by a positive int")
+        samples_per_sub_region, rest = divmod(len(self), n)
+        onset = 0
+        sub_regions = []
+        while onset < len(self):
+            offset = 0
+            if rest > 0:
+                offset = 1
+                rest -= 1
+            offset += onset + samples_per_sub_region
+            sub_regions.append(self[onset:offset])
+            onset = offset
+        return sub_regions
+
+    def __eq__(self, other):
+        if other is self:
+            return True
+        if not isinstance(other, AudioRegion):
+            return False
+        return (
+            (self._data == other._data)
+            and (self.sr == other.sr)
+            and (self.sw == other.sw)
+            and (self.ch == other.ch)
+        )
+
+    def __getitem__(self, index):
+        err_msg = "Slicing AudioRegion by samples requires indices of type "
+        err_msg += "'int' without a step (e.g. region.sec[1600:3200])"
+        start_sample, stop_sample = _check_convert_index(index, (int), err_msg)
+
+        bytes_per_sample = self.sample_width * self.channels
+        len_samples = len(self._data) // bytes_per_sample
+
+        if start_sample < 0:
+            start_sample = max(start_sample + len_samples, 0)
+        onset = start_sample * bytes_per_sample
+
+        if stop_sample is not None:
+            if stop_sample < 0:
+                stop_sample = max(stop_sample + len_samples, 0)
+            offset = index.stop * bytes_per_sample
+        else:
+            offset = None
+
+        data = self._data[onset:offset]
+        return AudioRegion(data, self.sr, self.sw, self.ch)
+
+
+class StreamTokenizer:
     """
     Class for stream tokenizers. It implements a 4-state automaton scheme
     to extract sub-sequences of interest on the fly.
-    
-    :Parameters:
-    
-        `validator` :
-            instance of `DataValidator` that implements `is_valid` method.
-        
-        `min_length` : *(int)*
-            Minimum number of frames of a valid token. This includes all \
-            tolerated non valid frames within the token.
-            
-        `max_length` : *(int)*
-            Maximum number of frames of a valid token. This includes all \
-            tolerated non valid frames within the token.
-        
-        `max_continuous_silence` : *(int)*
-            Maximum number of consecutive non-valid frames within a token.
-            Note that, within a valid token, there may be many tolerated \
-            *silent* regions that contain each a number of non valid frames up to \
-            `max_continuous_silence`
-        
-        `init_min` : *(int, default=0)*
-            Minimum number of consecutive valid frames that must be **initially** \
-            gathered before any sequence of non valid frames can be tolerated. This
-            option is not always needed, it can be used to drop non-valid tokens as
-            early as possible. **Default = 0** means that the option is by default 
-            ineffective. 
-                
-        `init_max_silence` : *(int, default=0)*
-            Maximum number of tolerated consecutive non-valid frames if the \
-            number already gathered valid frames has not yet reached 'init_min'.
-            This argument is normally used if `init_min` is used. **Default = 0**,
-            by default this argument is not taken into consideration.
-            
-        `mode` : *(int, default=0)*
-            `mode` can be:
-        
-        1. `StreamTokenizer.STRICT_MIN_LENGTH`: 
-        if token *i* is delivered because `max_length`
-        is reached, and token *i+1* is immediately adjacent to
-        token *i* (i.e. token *i* ends at frame *k* and token *i+1* starts
-        at frame *k+1*) then accept token *i+1* only of it has a size of at
-        least `min_length`. The default behavior is to accept token *i+1*
-        event if it is shorter than `min_length` (given that the above conditions
-        are fulfilled of course).
-           
-        :Examples:
-               
-        In the following code, without `STRICT_MIN_LENGTH`, the 'BB' token is
-        accepted although it is shorter than `min_length` (3), because it immediately
-        follows the latest delivered token:
-            
-        .. code:: python
-        
-            from auditok import StreamTokenizer, StringDataSource, DataValidator
-    
-            class UpperCaseChecker(DataValidator):
-                def is_valid(self, frame):
-                    return frame.isupper()
-                   
-    
-            dsource = StringDataSource("aaaAAAABBbbb")
-            tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
-                                        min_length=3,
-                                        max_length=4,
-                                        max_continuous_silence=0)
-         
-            tokenizer.tokenize(dsource)
-                    
-                    
-        :output:
-    
-         .. code:: python
-         
-            [(['A', 'A', 'A', 'A'], 3, 6), (['B', 'B'], 7, 8)]
-
-
-        The following tokenizer will however reject the 'BB' token:
-     
-        .. code:: python
-                
-            dsource = StringDataSource("aaaAAAABBbbb")
-            tokenizer = StreamTokenizer(validator=UpperCaseChecker(), 
-                                        min_length=3, max_length=4,
-                                        max_continuous_silence=0,
-                                        mode=StreamTokenizer.STRICT_MIN_LENGTH)
-            tokenizer.tokenize(dsource)
-        
-        :output:
-            
-        .. code:: python
-            
-            [(['A', 'A', 'A', 'A'], 3, 6)]
-            
-           
-        2. `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all tailing non-valid frames
-        from a token to be delivered if and only if it is not **truncated**.
-        This can be a bit tricky. A token is actually delivered if:
-           
-        - a. `max_continuous_silence` is reached
-           
-        :or:
-           
-        - b. Its length reaches `max_length`. This is called a **truncated** token
-           
-        In the current implementation, a `StreamTokenizer`'s decision is only based on already seen
-        data and on incoming data. Thus, if a token is truncated at a non-valid but tolerated
-        frame (`max_length` is reached but `max_continuous_silence` not yet) any tailing
-        silence will be kept because it can potentially be part of valid token (if `max_length`
-        was bigger). But if `max_continuous_silence` is reached before `max_length`, the delivered
-        token will not be considered as truncated but a result of *normal* end of detection
-        (i.e. no more valid data). In that case the tailing silence can be removed if you use
-        the `StreamTokenizer.DROP_TRAILING_SILENCE` mode.
-    
-        :Example:
-    
-        .. code:: python
-                       
-             tokenizer = StreamTokenizer(validator=UpperCaseChecker(), min_length=3,
-                                         max_length=6, max_continuous_silence=3,
-                                         mode=StreamTokenizer.DROP_TRAILING_SILENCE)
-            
-             dsource = StringDataSource("aaaAAAaaaBBbbbb")
-             tokenizer.tokenize(dsource)
-        
-        :output:
-            
-        .. code:: python
-                
-            [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B'], 9, 10)]
-                    
-        The first token is delivered with its tailing silence because it is truncated
-        while the second one has its tailing frames removed.
-                    
-        Without `StreamTokenizer.DROP_TRAILING_SILENCE` the output would be:
-                        
-        .. code:: python
-         
-            [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B', 'b', 'b', 'b'], 9, 13)]
-    
-        
-        
-        3. `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`:
-        use both options. That means: first remove tailing silence, then ckeck if the
-        token still has at least a length of `min_length`.
+
+    Parameters
+    ----------
+    validator : callable, DataValidator (must implement `is_valid`)
+        called with each data frame read from source. Should take one positional
+        argument and return True or False for valid and invalid frames
+        respectively.
+
+    min_length : int
+        Minimum number of frames of a valid token. This includes all
+        tolerated non valid frames within the token.
+
+    max_length : int
+        Maximum number of frames of a valid token. This includes all
+        tolerated non valid frames within the token.
+
+    max_continuous_silence : int
+        Maximum number of consecutive non-valid frames within a token.
+        Note that, within a valid token, there may be many tolerated
+        *silent* regions that contain each a number of non valid frames up
+        to `max_continuous_silence`
+
+    init_min : int
+        Minimum number of consecutive valid frames that must be
+        **initially** gathered before any sequence of non valid frames can
+        be tolerated. This option is not always needed, it can be used to
+        drop non-valid tokens as early as possible. **Default = 0** means
+        that the option is by default ineffective.
+
+    init_max_silence : int
+        Maximum number of tolerated consecutive non-valid frames if the
+        number already gathered valid frames has not yet reached
+        'init_min'.This argument is normally used if `init_min` is used.
+        **Default = 0**, by default this argument is not taken into
+        consideration.
+
+    mode : int
+        mode can be one of the following:
+
+            -1 `StreamTokenizer.NORMAL` : do not drop trailing silence, and
+            accept a token shorter than `min_length` if it is the continuation
+            of the latest delivered token.
+
+            -2 `StreamTokenizer.STRICT_MIN_LENGTH`: if token `i` is delivered
+            because `max_length` is reached, and token `i+1` is immediately
+            adjacent to token `i` (i.e. token `i` ends at frame `k` and token
+            `i+1` starts at frame `k+1`) then accept token `i+1` only of it has
+            a size of at least `min_length`. The default behavior is to accept
+            token `i+1` event if it is shorter than `min_length` (provided that
+            the above conditions are fulfilled of course).
+
+            -3 `StreamTokenizer.DROP_TRAILING_SILENCE`: drop all tailing
+            non-valid frames from a token to be delivered if and only if it
+            is not **truncated**. This can be a bit tricky. A token is actually
+            delivered if:
+
+                - `max_continuous_silence` is reached.
+
+                - Its length reaches `max_length`. This is referred to as a
+                  **truncated** token.
+
+            In the current implementation, a `StreamTokenizer`'s decision is only
+            based on already seen data and on incoming data. Thus, if a token is
+            truncated at a non-valid but tolerated frame (`max_length` is reached
+            but `max_continuous_silence` not yet) any tailing silence will be kept
+            because it can potentially be part of valid token (if `max_length` was
+            bigger). But if `max_continuous_silence` is reached before
+            `max_length`, the delivered token will not be considered as truncated
+            but a result of *normal* end of detection (i.e. no more valid data).
+            In that case the trailing silence can be removed if you use the
+            `StreamTokenizer.DROP_TRAILING_SILENCE` mode.
+
+            -4 `(StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE)`:
+            use both options. That means: first remove tailing silence, then
+            check if the token still has a length of at least `min_length`.
+
+
+
+
+    Examples
+    --------
+
+    In the following code, without `STRICT_MIN_LENGTH`, the 'BB' token is
+    accepted although it is shorter than `min_length` (3), because it
+    immediately follows the latest delivered token:
+
+    >>> from auditok.core import StreamTokenizer
+    >>> from StringDataSource, DataValidator
+
+    >>> class UpperCaseChecker(DataValidator):
+    >>>     def is_valid(self, frame):
+                return frame.isupper()
+    >>> dsource = StringDataSource("aaaAAAABBbbb")
+    >>> tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
+                                    min_length=3,
+                                    max_length=4,
+                                    max_continuous_silence=0)
+    >>> tokenizer.tokenize(dsource)
+    [(['A', 'A', 'A', 'A'], 3, 6), (['B', 'B'], 7, 8)]
+
+
+    The following tokenizer will however reject the 'BB' token:
+
+    >>> dsource = StringDataSource("aaaAAAABBbbb")
+    >>> tokenizer = StreamTokenizer(validator=UpperCaseChecker(),
+                                    min_length=3, max_length=4,
+                                    max_continuous_silence=0,
+                                    mode=StreamTokenizer.STRICT_MIN_LENGTH)
+    >>> tokenizer.tokenize(dsource)
+    [(['A', 'A', 'A', 'A'], 3, 6)]
+
+
+
+    >>> tokenizer = StreamTokenizer(
+    >>>                validator=UpperCaseChecker(),
+    >>>                min_length=3,
+    >>>                max_length=6,
+    >>>                max_continuous_silence=3,
+    >>>                mode=StreamTokenizer.DROP_TRAILING_SILENCE
+    >>>                )
+    >>> dsource = StringDataSource("aaaAAAaaaBBbbbb")
+    >>> tokenizer.tokenize(dsource)
+    [(['A', 'A', 'A', 'a', 'a', 'a'], 3, 8), (['B', 'B'], 9, 10)]
+
+    The first token is delivered with its tailing silence because it is
+    truncated while the second one has its tailing frames removed.
+
+    Without `StreamTokenizer.DROP_TRAILING_SILENCE` the output would be:
+
+    .. code:: python
+
+        [
+            (['A', 'A', 'A', 'a', 'a', 'a'], 3, 8),
+            (['B', 'B', 'b', 'b', 'b'], 9, 13)
+        ]
+
     """
-    
-    
+
     SILENCE = 0
     POSSIBLE_SILENCE = 1
-    POSSIBLE_NOISE = 2 
+    POSSIBLE_NOISE = 2
     NOISE = 3
-    
+    NORMAL = 0
     STRICT_MIN_LENGTH = 2
     DROP_TRAILING_SILENCE = 4
-    # alias
-    DROP_TAILING_SILENCE = 4
-    
-    def __init__(self, validator, 
-                 min_length, max_length, max_continuous_silence,
-                 init_min=0, init_max_silence=0,
-                 mode=0):
-        
-        if not isinstance(validator, DataValidator):
-            raise TypeError("'validator' must be an instance of 'DataValidator'")
-        
+
+    def __init__(
+        self,
+        validator,
+        min_length,
+        max_length,
+        max_continuous_silence,
+        init_min=0,
+        init_max_silence=0,
+        mode=0,
+    ):
+        if callable(validator):
+            self._is_valid = validator
+        elif isinstance(validator, DataValidator):
+            self._is_valid = validator.is_valid
+        else:
+            raise TypeError(
+                "'validator' must be a callable or an instance of "
+                "DataValidator"
+            )
+
         if max_length <= 0:
-            raise ValueError("'max_length' must be > 0 (value={0})".format(max_length))
-        
+            raise ValueError(
+                "'max_length' must be > 0 (value={0})".format(max_length)
+            )
+
         if min_length <= 0 or min_length > max_length:
-            raise ValueError("'min_length' must be > 0 and <= 'max_length' (value={0})".format(min_length))
-        
+            err_msg = "'min_length' must be > 0 and <= 'max_length' (value={0})"
+            raise ValueError(err_msg.format(min_length))
+
         if max_continuous_silence >= max_length:
-            raise ValueError("'max_continuous_silence' must be < 'max_length' (value={0})".format(max_continuous_silence))
-        
+            err_msg = "'max_continuous_silence' must be < 'max_length' "
+            err_msg += "(value={0})"
+            raise ValueError(err_msg.format(max_continuous_silence))
+
         if init_min >= max_length:
-            raise ValueError("'init_min' must be < 'max_length' (value={0})".format(max_continuous_silence))
-            
+            raise ValueError(
+                "'init_min' must be < 'max_length' (value={0})".format(
+                    max_continuous_silence
+                )
+            )
+
         self.validator = validator
         self.min_length = min_length
         self.max_length = max_length
         self.max_continuous_silence = max_continuous_silence
         self.init_min = init_min
         self.init_max_silent = init_max_silence
-        
-        self._mode = None
-        self.set_mode(mode)
-        self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
-        self._drop_tailing_silence  = (mode & self.DROP_TRAILING_SILENCE) != 0
-        
+        self._set_mode(mode)
         self._deliver = None
         self._tokens = None
         self._state = None
         self._data = None
         self._contiguous_token = False
-        
         self._init_count = 0
         self._silence_length = 0
         self._start_frame = 0
         self._current_frame = 0
-    
-    def set_mode(self, mode):
-        """
-        :Parameters:
-        
-            `mode` : *(int)*
-                New mode, must be one of:
-                    
-                
-            - `StreamTokenizer.STRICT_MIN_LENGTH`
-            
-            - `StreamTokenizer.DROP_TRAILING_SILENCE`
-            
-            - `StreamTokenizer.STRICT_MIN_LENGTH | StreamTokenizer.DROP_TRAILING_SILENCE`
-                   
-            - `0`
-                       
-        See `StreamTokenizer.__init__` for more information about the mode.
-        """
-        
-        if not mode in [self.STRICT_MIN_LENGTH, self.DROP_TRAILING_SILENCE,
-           self.STRICT_MIN_LENGTH | self.DROP_TRAILING_SILENCE, 0]:
-            
+
+    def _set_mode(self, mode):
+        strict_min_and_drop_trailing = StreamTokenizer.STRICT_MIN_LENGTH
+        strict_min_and_drop_trailing |= StreamTokenizer.DROP_TRAILING_SILENCE
+        if mode not in [
+            StreamTokenizer.NORMAL,
+            StreamTokenizer.STRICT_MIN_LENGTH,
+            StreamTokenizer.DROP_TRAILING_SILENCE,
+            strict_min_and_drop_trailing,
+        ]:
             raise ValueError("Wrong value for mode")
-        
         self._mode = mode
         self._strict_min_length = (mode & self.STRICT_MIN_LENGTH) != 0
-        self._drop_tailing_silence  = (mode & self.DROP_TRAILING_SILENCE) != 0
-        
-    
-    def get_mode(self):
-        """
-        Return the current mode. To check whether a specific mode is activated use
-        the bitwise 'and' operator `&`. Example:
-           
-        .. code:: python 
-                
-            if mode & self.STRICT_MIN_LENGTH != 0:
-               do_something()
-        """
-        return self._mode
-        
+        self._drop_trailing_silence = (mode & self.DROP_TRAILING_SILENCE) != 0
+
     def _reinitialize(self):
         self._contiguous_token = False
         self._data = []
@@ -266,112 +1269,114 @@ class StreamTokenizer():
         self._state = self.SILENCE
         self._current_frame = -1
         self._deliver = self._append_token
-    
-    
-    def tokenize(self, data_source, callback=None):
+
+    def tokenize(self, data_source, callback=None, generator=False):
         """
-        Read data from `data_source`, one frame a time, and process the read frames in
-        order to detect sequences of frames that make up valid tokens.
-        
+        Read data from `data_source`, one frame a time, and process the read
+        frames in order to detect sequences of frames that make up valid
+        tokens.
+
         :Parameters:
-           `data_source` : instance of the :class:`DataSource` class that implements a `read` method.
-               'read' should return a slice of signal, i.e. frame (of whatever \
-               type as long as it can be processed by validator) and None if \
-               there is no more signal.
-        
+           `data_source` : instance of the :class:`DataSource` class that
+               implements a `read` method. 'read' should return a slice of
+               signal, i.e. frame (of whatever type as long as it can be
+               processed by validator) and None if there is no more signal.
+
            `callback` : an optional 3-argument function.
-               If a `callback` function is given, it will be called each time a valid token
-               is found.
-           
-           
+               If a `callback` function is given, it will be called each time
+               a valid token is found.
+
+
         :Returns:
-           A list of tokens if `callback` is None. Each token is tuple with the following elements:
-        
+           A list of tokens if `callback` is None. Each token is tuple with the
+           following elements:
+
             .. code python
-            
+
                 (data, start, end)
-            
-           where `data` is a list of read frames, `start`: index of the first frame in the
-           original data and `end` : index of the last frame. 
-        
+
+           where `data` is a list of read frames, `start`: index of the first
+           frame in the original data and `end` : index of the last frame.
         """
-        
+        token_gen = self._iter_tokens(data_source)
+        if callback:
+            for token in token_gen:
+                callback(*token)
+            return
+        if generator:
+            return token_gen
+        return list(token_gen)
+
+    def _iter_tokens(self, data_source):
         self._reinitialize()
-        
-        if callback is not None:
-            self._deliver = callback
-        
         while True:
-            frame =  data_source.read()
+            frame = data_source.read()
+            self._current_frame += 1
             if frame is None:
+                token = self._post_process()
+                if token is not None:
+                    yield token
                 break
-            self._current_frame += 1
-            self._process(frame)
-            
-        self._post_process()
-        
-        if callback is None:
-            _ret = self._tokens
-            self._tokens = None
-            return _ret
-        
-        
-    def _process(self, frame):
-        
-        frame_is_valid = self.validator.is_valid(frame)
-        
+            token = self._process(frame)
+            if token is not None:
+                yield token
+
+    def _process(self, frame):  # noqa: C901
+
+        frame_is_valid = self._is_valid(frame)
+
         if self._state == self.SILENCE:
-            
+
             if frame_is_valid:
                 # seems we got a valid frame after a silence
                 self._init_count = 1
                 self._silence_length = 0
                 self._start_frame = self._current_frame
                 self._data.append(frame)
-                
-                if self._init_count  >= self.init_min:
+
+                if self._init_count >= self.init_min:
                     self._state = self.NOISE
                     if len(self._data) >= self.max_length:
-                        self._process_end_of_detection(True)
+                        return self._process_end_of_detection(True)
                 else:
                     self._state = self.POSSIBLE_NOISE
-        
+
         elif self._state == self.POSSIBLE_NOISE:
-            
+
             if frame_is_valid:
                 self._silence_length = 0
                 self._init_count += 1
                 self._data.append(frame)
-                if self._init_count  >= self.init_min:
+                if self._init_count >= self.init_min:
                     self._state = self.NOISE
                     if len(self._data) >= self.max_length:
-                        self._process_end_of_detection(True)
-            
-            else:                
+                        return self._process_end_of_detection(True)
+
+            else:
                 self._silence_length += 1
-                if self._silence_length > self.init_max_silent or \
-                len(self._data) + 1 >= self.max_length:
+                if (
+                    self._silence_length > self.init_max_silent
+                    or len(self._data) + 1 >= self.max_length
+                ):
                     # either init_max_silent or max_length is reached
                     # before _init_count, back to silence
                     self._data = []
                     self._state = self.SILENCE
                 else:
                     self._data.append(frame)
-                    
-                
+
         elif self._state == self.NOISE:
-            
+
             if frame_is_valid:
                 self._data.append(frame)
                 if len(self._data) >= self.max_length:
-                    self._process_end_of_detection(True)
-            
-            elif self.max_continuous_silence <= 0 :
-                # max token reached at this frame will _deliver if _contiguous_token
-                # and not _strict_min_length
-                self._process_end_of_detection()
+                    return self._process_end_of_detection(True)
+
+            elif self.max_continuous_silence <= 0:
+                # max token reached at this frame will _deliver if
+                # _contiguous_token and not _strict_min_length
                 self._state = self.SILENCE
-                
+                return self._process_end_of_detection()
             else:
                 # this is the first silent frame following a valid one
                 # and it is tolerated
@@ -379,61 +1384,63 @@ class StreamTokenizer():
                 self._data.append(frame)
                 self._state = self.POSSIBLE_SILENCE
                 if len(self._data) == self.max_length:
-                    self._process_end_of_detection(True)
-                    # don't reset _silence_length because we still 
+                    return self._process_end_of_detection(True)
+                    # don't reset _silence_length because we still
                     # need to know the total number of silent frames
-                                   
-                                
-    
+
         elif self._state == self.POSSIBLE_SILENCE:
-            
+
             if frame_is_valid:
                 self._data.append(frame)
                 self._silence_length = 0
                 self._state = self.NOISE
                 if len(self._data) >= self.max_length:
-                    self._process_end_of_detection(True)
-                
+                    return self._process_end_of_detection(True)
+
             else:
                 if self._silence_length >= self.max_continuous_silence:
-                    if self._silence_length < len(self._data):
-                        # _deliver only gathered frames aren't all silent                    
-                        self._process_end_of_detection()
-                    else:
-                        self._data = []
                     self._state = self.SILENCE
+                    if self._silence_length < len(self._data):
+                        # _deliver only gathered frames aren't all silent
+                        return self._process_end_of_detection()
+                    self._data = []
                     self._silence_length = 0
                 else:
                     self._data.append(frame)
                     self._silence_length += 1
                     if len(self._data) >= self.max_length:
-                        self._process_end_of_detection(True)
-                        # don't reset _silence_length because we still 
+                        return self._process_end_of_detection(True)
+                        # don't reset _silence_length because we still
                         # need to know the total number of silent frames
-                        
-    
+
     def _post_process(self):
         if self._state == self.NOISE or self._state == self.POSSIBLE_SILENCE:
             if len(self._data) > 0 and len(self._data) > self._silence_length:
-                self._process_end_of_detection()
-    
-    
+                return self._process_end_of_detection()
+
     def _process_end_of_detection(self, truncated=False):
-        
-        if not truncated and self._drop_tailing_silence and self._silence_length > 0:
+
+        if (
+            not truncated
+            and self._drop_trailing_silence
+            and self._silence_length > 0
+        ):
             # happens if max_continuous_silence is reached
             # or max_length is reached at a silent frame
-            self._data = self._data[0: - self._silence_length]
-        
-        if (len(self._data) >= self.min_length) or \
-           (len(self._data) > 0 and \
-            not self._strict_min_length and self._contiguous_token):
-            
-            
-            
-            _end_frame = self._start_frame + len(self._data) - 1
-            self._deliver(self._data, self._start_frame, _end_frame)
-            
+            self._data = self._data[0 : -self._silence_length]
+
+        if (len(self._data) >= self.min_length) or (
+            len(self._data) > 0
+            and not self._strict_min_length
+            and self._contiguous_token
+        ):
+
+            start_frame = self._start_frame
+            end_frame = self._start_frame + len(self._data) - 1
+            data = self._data
+            self._data = []
+            token = (data, start_frame, end_frame)
+
             if truncated:
                 # next token (if any) will start at _current_frame + 1
                 self._start_frame = self._current_frame + 1
@@ -441,12 +1448,11 @@ class StreamTokenizer():
                 self._contiguous_token = True
             else:
                 self._contiguous_token = False
+            return token
         else:
-            self._contiguous_token = False       
-        
+            self._contiguous_token = False
+
         self._data = []
-            
-    
-    
+
     def _append_token(self, data, start, end):
         self._tokens.append((data, start, end))
diff --git a/libs/auditok/dataset.py b/libs/auditok/dataset.py
index 1a3a7af5c..98dc5d1d4 100644
--- a/libs/auditok/dataset.py
+++ b/libs/auditok/dataset.py
@@ -1,19 +1,31 @@
 """
-This module contains links to audio files you can use for test purposes.
+This module contains links to audio files that can be used for test purposes.
+
+.. autosummary::
+    :toctree: generated/
+
+    one_to_six_arabic_16000_mono_bc_noise
+    was_der_mensch_saet_mono_44100_lead_trail_silence
 """
 
 import os
 
-__all__ = ["one_to_six_arabic_16000_mono_bc_noise", "was_der_mensch_saet_mono_44100_lead_trail_silence"]
+__all__ = [
+    "one_to_six_arabic_16000_mono_bc_noise",
+    "was_der_mensch_saet_mono_44100_lead_trail_silence",
+]
 
 _current_dir = os.path.dirname(os.path.realpath(__file__))
 
 one_to_six_arabic_16000_mono_bc_noise = "{cd}{sep}data{sep}1to6arabic_\
-16000_mono_bc_noise.wav".format(cd=_current_dir, sep=os.path.sep)
+16000_mono_bc_noise.wav".format(
+    cd=_current_dir, sep=os.path.sep
+)
 """A wave file that contains a pronunciation of Arabic numbers from 1 to 6"""
 
-
 was_der_mensch_saet_mono_44100_lead_trail_silence = "{cd}{sep}data{sep}was_\
 der_mensch_saet_das_wird_er_vielfach_ernten_44100Hz_mono_lead_trail_\
-silence.wav".format(cd=_current_dir, sep=os.path.sep)
-""" A wave file that contains a sentence between long leading and trailing periods of silence"""
-\ No newline at end of file
+silence.wav".format(
+    cd=_current_dir, sep=os.path.sep
+)
+"""A wave file that contains a sentence with a long leading and trailing silence"""
diff --git a/libs/auditok/exceptions.py b/libs/auditok/exceptions.py
index 0026a9d89..7bc5054ee 100644
--- a/libs/auditok/exceptions.py
+++ b/libs/auditok/exceptions.py
@@ -1,9 +1,41 @@
-"""
-November 2015
-@author: Amine SEHILI <[email protected]>
-"""
-
 class DuplicateArgument(Exception):
     pass
 
 
+class TooSamllBlockDuration(ValueError):
+    """Raised when block_dur results in a block_size smaller than one sample."""
+
+    def __init__(self, message, block_dur, sampling_rate):
+        self.block_dur = block_dur
+        self.sampling_rate = sampling_rate
+        super(TooSamllBlockDuration, self).__init__(message)
+
+
+class TimeFormatError(Exception):
+    """Raised when a duration formatting directive is unknown."""
+
+
+class EndOfProcessing(Exception):
+    """Raised within command line script's main function to jump to
+    postprocessing code."""
+
+
+class AudioIOError(Exception):
+    """Raised when a compressed audio file cannot be loaded or when trying
+    to read from a not yet open AudioSource"""
+
+
+class AudioParameterError(AudioIOError):
+    """Raised when one audio parameter is missing when loading raw data or
+    saving data to a format other than raw. Also raised when an audio
+    parameter has a wrong value."""
+
+
+class AudioEncodingError(Exception):
+    """Raised if audio data can not be encoded in the provided format"""
+
+
+class AudioEncodingWarning(RuntimeWarning):
+    """Raised if audio data can not be encoded in the provided format
+    but saved as wav.
+    """
diff --git a/libs/auditok/io.py b/libs/auditok/io.py
index 665ab274d..b5fb61a76 100644
--- a/libs/auditok/io.py
+++ b/libs/auditok/io.py
@@ -1,499 +1,1021 @@
 """
 Module for low-level audio input-output operations.
 
-Class summary
-=============
-
 .. autosummary::
+    :toctree: generated/
 
-        AudioSource
-        Rewindable
-        BufferAudioSource
-        WaveAudioSource
-        PyAudioSource
-        StdinAudioSource
-        PyAudioPlayer
-        
+    AudioSource
+    Rewindable
+    BufferAudioSource
+    WaveAudioSource
+    PyAudioSource
+    StdinAudioSource
+    PyAudioPlayer
+    from_file
+    to_file
+    player_for
+"""
+import os
+import sys
+import wave
+import warnings
+from abc import ABC, abstractmethod
+from functools import partial
+from .exceptions import AudioIOError, AudioParameterError
 
-Function summary
-================
+try:
+    from pydub import AudioSegment
 
-.. autosummary::
+    _WITH_PYDUB = True
+except ImportError:
+    _WITH_PYDUB = False
 
-        from_file
-        player_for
-"""
+try:
+    from tqdm import tqdm as _tqdm
 
-from abc import ABCMeta, abstractmethod
-import wave
-import sys
+    DEFAULT_BAR_FORMAT_TQDM = "|" + "{bar}" + "|" + "[{elapsed}/{duration}]"
+    DEFAULT_NCOLS_TQDM = 30
+    DEFAULT_NCOLS_TQDM = 30
+    DEFAULT_MIN_INTERVAL_TQDM = 0.05
+    _WITH_TQDM = True
+except ImportError:
+    _WITH_TQDM = False
 
-__all__ = ["AudioSource", "Rewindable", "BufferAudioSource", "WaveAudioSource",
-           "PyAudioSource", "StdinAudioSource", "PyAudioPlayer", "from_file", "player_for"]
 
-DEFAULT_SAMPLE_RATE = 16000
+__all__ = [
+    "AudioSource",
+    "Rewindable",
+    "BufferAudioSource",
+    "RawAudioSource",
+    "WaveAudioSource",
+    "PyAudioSource",
+    "StdinAudioSource",
+    "PyAudioPlayer",
+    "from_file",
+    "to_file",
+    "player_for",
+]
+
+DEFAULT_SAMPLING_RATE = 16000
 DEFAULT_SAMPLE_WIDTH = 2
 DEFAULT_NB_CHANNELS = 1
 
 
-class AudioSource():
-    """ 
+def check_audio_data(data, sample_width, channels):
+    sample_size_bytes = int(sample_width * channels)
+    nb_samples = len(data) // sample_size_bytes
+    if nb_samples * sample_size_bytes != len(data):
+        raise AudioParameterError(
+            "The length of audio data must be an integer "
+            "multiple of `sample_width * channels`"
+        )
+
+
+def _guess_audio_format(fmt, filename):
+    if fmt is None:
+        extension = os.path.splitext(filename.lower())[1][1:]
+        if extension:
+            fmt = extension
+        else:
+            return None
+    fmt = fmt.lower()
+    if fmt == "wave":
+        fmt = "wav"
+    return fmt
+
+
+def _get_audio_parameters(param_dict):
+    """
+    Get audio parameters from a dictionary of parameters. An audio parameter can
+    have a long name or a short name. If the long name is present, the short
+    name will be ignored. If neither is present then `AudioParameterError` is
+    raised.
+
+    Expected parameters are:
+
+        - `sampling_rate`, `sr` : int, sampling rate.
+
+        - `sample_width`, `sw` : int, sample size in bytes.
+
+        - `channels`, `ch` : int, number of channels.
+
+    Returns
+    -------
+    audio_parameters : tuple
+        a tuple for audio parameters as (sampling_rate, sample_width, channels).
+    """
+    err_message = (
+        "'{ln}' (or '{sn}') must be a positive integer, found: '{val}'"
+    )
+    parameters = []
+    for (long_name, short_name) in (
+        ("sampling_rate", "sr"),
+        ("sample_width", "sw"),
+        ("channels", "ch"),
+    ):
+        param = param_dict.get(long_name, param_dict.get(short_name))
+        if param is None or not isinstance(param, int) or param <= 0:
+            raise AudioParameterError(
+                err_message.format(ln=long_name, sn=short_name, val=param)
+            )
+        parameters.append(param)
+    sampling_rate, sample_width, channels = parameters
+    return sampling_rate, sample_width, channels
+
+
+class AudioSource(ABC):
+    """
     Base class for audio source objects.
-        
-    Subclasses should implement methods to open/close and audio stream 
+
+    Subclasses should implement methods to open/close and audio stream
     and read the desired amount of audio samples.
-    
-    :Parameters:
-        
-        `sampling_rate` : int
-            Number of samples per second of audio stream. Default = 16000.
-        
-        `sample_width` : int
-            Size in bytes of one audio sample. Possible values : 1, 2, 4.
-            Default = 2.
-            
-        `channels` : int
-            Number of channels of audio stream. The current version supports
-            only mono audio streams (i.e. one channel).
-    """
-    
-    __metaclass__ = ABCMeta
-
-    def __init__(self, sampling_rate = DEFAULT_SAMPLE_RATE,
-                 sample_width = DEFAULT_SAMPLE_WIDTH,
-                 channels = DEFAULT_NB_CHANNELS):
-  
-        if not sample_width in (1, 2, 4):
-            raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
-        
-        if channels != 1:
-            raise ValueError("Only mono audio is currently handled")
-            
-        self.sampling_rate = sampling_rate
-        self.sample_width = sample_width
-        self.channels = channels
-      
+
+    Parameters
+    ----------
+    sampling_rate : int
+        number of samples per second of audio data.
+    sample_width : int
+        size in bytes of one audio sample. Possible values: 1, 2 or 4.
+    channels : int
+        number of channels of audio data.
+    """
+
+    def __init__(
+        self, sampling_rate, sample_width, channels,
+    ):
+
+        if sample_width not in (1, 2, 4):
+            raise AudioParameterError(
+                "Sample width must be one of: 1, 2 or 4 (bytes)"
+            )
+
+        self._sampling_rate = sampling_rate
+        self._sample_width = sample_width
+        self._channels = channels
+
     @abstractmethod
     def is_open(self):
-        """ Return True if audio source is open, False otherwise """
-    
+        """Return True if audio source is open, False otherwise."""
+
     @abstractmethod
     def open(self):
-        """ Open audio source """
-    
+        """Open audio source."""
+
     @abstractmethod
     def close(self):
-        """ Close audio source """
-    
+        """Close audio source."""
+
     @abstractmethod
     def read(self, size):
         """
         Read and return `size` audio samples at most.
-        
-        :Parameters:
-        
-            `size` : int
-                the number of samples to read.
-            
-        :Returns:
-            
-            Audio data as a string of length 'N' * 'smaple_width' * 'channels', where 'N' is:
-            
-            - `size` if `size` < 'left_samples'
-            
-            - 'left_samples' if `size` > 'left_samples' 
-        
-        """ 
-    
-    def get_sampling_rate(self):
-        """ Return the number of samples per second of audio stream """
-        return self.sampling_rate
-    
-    def get_sample_width(self):
-        """ Return the number of bytes used to represent one audio sample """
-        return self.sample_width
-    
-    def get_channels(self):
-        """ Return the number of channels of this audio source """
+
+        Parameters
+        -----------
+        size : int
+            Number of samples to read.
+
+        Returns
+        -------
+        data : bytes
+            Audio data as a bytes object of length `N * sample_width * channels`
+            where `N` equals:
+
+            - `size` if `size` <= remaining samples
+
+            - remaining samples if `size` > remaining samples
+        """
+
+    @property
+    def sampling_rate(self):
+        """Number of samples per second of audio stream."""
+        return self._sampling_rate
+
+    @property
+    def sr(self):
+        """Number of samples per second of audio stream (alias for
+        `sampling_rate)`."""
+        return self._sampling_rate
+
+    @property
+    def sample_width(self):
+        """Number of bytes used to represent one audio sample."""
+        return self._sample_width
+
+    @property
+    def sw(self):
+        """Number of bytes used to represent one audio sample (alias for
+        `sample_width`)."""
+        return self._sample_width
+
+    @property
+    def channels(self):
+        """Number of channels in audio stream."""
+        return self._channels
+
+    @property
+    def ch(self):
+        """Number of channels in audio stream (alias for `channels`)."""
         return self.channels
-    
 
 
-class Rewindable():
+class Rewindable(AudioSource):
     """
     Base class for rewindable audio streams.
-    Subclasses should implement methods to return to the beginning of an
-    audio stream as well as method to move to an absolute audio position
-    expressed in time or in number of samples. 
+
+    Subclasses should implement a method to return back to the start of an the
+    stream (`rewind`), as well as a property getter/setter named `position` that
+    reads/sets stream position expressed in number of samples.
     """
-    
-    __metaclass__ = ABCMeta
-    
+
     @abstractmethod
     def rewind(self):
-        """ Go back to the beginning of audio stream """
-        pass
-    
-    @abstractmethod
-    def get_position(self):
-        """ Return the total number of already read samples """
-    
-    @abstractmethod
-    def get_time_position(self):
-        """ Return the total duration in seconds of already read data """
-    
+        """Go back to the beginning of audio stream."""
+
+    @property
     @abstractmethod
-    def set_position(self, position):
-        """ Move to an absolute position 
-        
-        :Parameters:
-        
-            `position` : int
-                number of samples to skip from the start of the stream
-        """
-    
+    def position(self):
+        """Return stream position in number of samples."""
+
+    @position.setter
     @abstractmethod
-    def set_time_position(self, time_position):
-        """ Move to an absolute position expressed in seconds
-        
-        :Parameters:
-        
-            `time_position` : float
-                seconds to skip from the start of the stream
-        """
-        pass
+    def position(self, position):
+        """Set stream position in number of samples."""
+
+    @property
+    def position_s(self):
+        """Return stream position in seconds."""
+        return self.position / self.sampling_rate
 
-    
+    @position_s.setter
+    def position_s(self, position_s):
+        """Set stream position in seconds."""
+        self.position = int(self.sampling_rate * position_s)
 
-class BufferAudioSource(AudioSource, Rewindable):
+    @property
+    def position_ms(self):
+        """Return stream position in milliseconds."""
+        return (self.position * 1000) // self.sampling_rate
+
+    @position_ms.setter
+    def position_ms(self, position_ms):
+        """Set stream position in milliseconds."""
+        if not isinstance(position_ms, int):
+            raise ValueError("position_ms should be an int")
+        self.position = int(self.sampling_rate * position_ms / 1000)
+
+
+class BufferAudioSource(Rewindable):
     """
-    An :class:`AudioSource` that encapsulates and reads data from a memory buffer.
-    It implements methods from :class:`Rewindable` and is therefore a navigable :class:`AudioSource`.
+    An `AudioSource` that encapsulates and reads data from a memory buffer.
+
+    This class implements the `Rewindable` interface.
+    Parameters
+    ----------
+    data : bytes
+        audio data
+    sampling_rate : int, default: 16000
+        number of samples per second of audio data.
+    sample_width : int, default: 2
+        size in bytes of one audio sample. Possible values: 1, 2 or 4.
+    channels : int, default: 1
+        number of channels of audio data.
     """
-    
-    def __init__(self, data_buffer,
-                 sampling_rate = DEFAULT_SAMPLE_RATE,
-                 sample_width = DEFAULT_SAMPLE_WIDTH,
-                 channels = DEFAULT_NB_CHANNELS):
-        
-        if len(data_buffer) % (sample_width * channels) !=0:
-            raise ValueError("length of data_buffer must be a multiple of (sample_width * channels)")
-        
+
+    def __init__(
+        self, data, sampling_rate=16000, sample_width=2, channels=1,
+    ):
         AudioSource.__init__(self, sampling_rate, sample_width, channels)
-        self._buffer = data_buffer
-        self._index = 0
-        self._left = 0 if self._buffer is None else len(self._buffer)
+        check_audio_data(data, sample_width, channels)
+        self._data = data
+        self._sample_size_all_channels = sample_width * channels
+        self._current_position_bytes = 0
         self._is_open = False
-    
+
     def is_open(self):
         return self._is_open
-        
+
     def open(self):
         self._is_open = True
-    
+
     def close(self):
         self._is_open = False
         self.rewind()
-    
+
     def read(self, size):
         if not self._is_open:
-            raise IOError("Stream is not open")
-        
-        if self._left > 0:
-            
-            to_read = size * self.sample_width * self.channels       
-            if to_read > self._left:
-                to_read = self._left 
-                            
-            data = self._buffer[self._index: self._index + to_read]
-            self._index += to_read
-            self._left -= to_read
-            
+            raise AudioIOError("Stream is not open")
+        if size is None or size < 0:
+            offset = None
+        else:
+            bytes_to_read = self._sample_size_all_channels * size
+            offset = self._current_position_bytes + bytes_to_read
+        data = self._data[self._current_position_bytes : offset]
+        if data:
+            self._current_position_bytes += len(data)
             return data
-        
         return None
-    
-    def get_data_buffer(self):
-        """ Return all audio data as one string buffer. """
-        return self._buffer
-    
-    def set_data(self, data_buffer):
-        """ Set new data for this audio stream. 
-        
-        :Parameters:
-        
-            `data_buffer` : str, basestring, Bytes
-                a string buffer with a length multiple of (sample_width * channels)
-        """
-        if len(data_buffer) % (self.sample_width * self.channels) !=0:
-            raise ValueError("length of data_buffer must be a multiple of (sample_width * channels)")
-        self._buffer = data_buffer
-        self._index = 0
-        self._left = 0 if self._buffer is None else len(self._buffer)
-    
-    def append_data(self, data_buffer):
-        """ Append data to this audio stream
-        
-        :Parameters:
-        
-            `data_buffer` : str, basestring, Bytes
-                a buffer with a length multiple of (sample_width * channels)
-        """
-        
-        if len(data_buffer) % (self.sample_width * self.channels) !=0:
-            raise ValueError("length of data_buffer must be a multiple of (sample_width * channels)")
-        
-        self._buffer += data_buffer
-        self._left += len(data_buffer)
-
-    
+
+    @property
+    def data(self):
+        """Get raw audio data as a `bytes` object."""
+        return self._data
+
     def rewind(self):
-        self.set_position(0)
-    
-    def get_position(self):
-        return self._index / self.sample_width
-    
-    def get_time_position(self):
-        return float(self._index) / (self.sample_width * self.sampling_rate) 
-    
-    def set_position(self, position):
-        if position < 0:
-            raise ValueError("position must be >= 0")
-        
-        if self._buffer is None:
-            self._index = 0
-            self._left = 0
-            return
-         
-        position *= self.sample_width 
-        self._index = position if position < len(self._buffer) else len(self._buffer)
-        self._left = len(self._buffer) - self._index
+        self.position = 0
+
+    @property
+    def position(self):
+        """Get stream position in number of samples"""
+        return self._current_position_bytes // self._sample_size_all_channels
 
+    @position.setter
+    def position(self, position):
+        """Set stream position in number of samples."""
+        position *= self._sample_size_all_channels
+        if position < 0:
+            position += len(self.data)
+        if position < 0 or position > len(self.data):
+            raise IndexError("Position out of range")
+        self._current_position_bytes = position
 
-    def set_time_position(self, time_position): # time in seconds
-        position = int(self.sampling_rate * time_position)
-        self.set_position(position)
+    @property
+    def position_ms(self):
+        """Get stream position in milliseconds."""
+        return (self._current_position_bytes * 1000) // (
+            self._sample_size_all_channels * self.sampling_rate
+        )
 
+    @position_ms.setter
+    def position_ms(self, position_ms):
+        """Set stream position in milliseconds."""
+        if not isinstance(position_ms, int):
+            raise ValueError("position_ms should be an int")
+        self.position = int(self.sampling_rate * position_ms / 1000)
 
 
-class WaveAudioSource(AudioSource):
+class FileAudioSource(AudioSource):
     """
-    A class for an `AudioSource` that reads data from a wave file.
-    
-    :Parameters:
-        
-        `filename` :
-            path to a valid wave file
+    Base class `AudioSource`s that read audio data from a file.
+
+    Parameters
+    ----------
+    sampling_rate : int, default: 16000
+        number of samples per second of audio data.
+    sample_width : int, default: 2
+        size in bytes of one audio sample. Possible values: 1, 2 or 4.
+    channels : int, default: 1
+        number of channels of audio data.
     """
-    
-    def __init__(self, filename):
-        
-        self._filename = filename
+
+    def __init__(self, sampling_rate, sample_width, channels):
+        AudioSource.__init__(self, sampling_rate, sample_width, channels)
         self._audio_stream = None
-        
-        stream = wave.open(self._filename)
-        AudioSource.__init__(self, stream.getframerate(),
-                                   stream.getsampwidth(),
-                                   stream.getnchannels())
-        stream.close()
-    
-    
+
+    def __del__(self):
+        if self.is_open():
+            self.close()
+
     def is_open(self):
         return self._audio_stream is not None
- 
-    def open(self):
-        if(self._audio_stream is None):
-            self._audio_stream = wave.open(self._filename)
-      
-        
+
     def close(self):
         if self._audio_stream is not None:
             self._audio_stream.close()
             self._audio_stream = None
-        
-    
+
+    @abstractmethod
+    def _read_from_stream(self, size):
+        """Read data from stream"""
+
     def read(self, size):
+        if not self.is_open():
+            raise AudioIOError("Audio stream is not open")
+        data = self._read_from_stream(size)
+        if not data:
+            return None
+        return data
+
+
+class RawAudioSource(FileAudioSource):
+    """
+    A class for an `AudioSource` that reads data from a raw (headerless) audio
+    file.
+
+    This class should be used for large raw audio files to avoid loading the
+    whole data to memory.
+
+    Parameters
+    ----------
+    filename : str
+        path to a raw audio file.
+    sampling_rate : int
+        Number of samples per second of audio data.
+    sample_width : int
+        Size in bytes of one audio sample. Possible values : 1, 2, 4.
+    channels : int
+        Number of channels of audio data.
+    """
+
+    def __init__(self, file, sampling_rate, sample_width, channels):
+        FileAudioSource.__init__(self, sampling_rate, sample_width, channels)
+        self._file = file
+        self._audio_stream = None
+        self._sample_size = sample_width * channels
+
+    def open(self):
         if self._audio_stream is None:
-            raise IOError("Stream is not open")
+            self._audio_stream = open(self._file, "rb")
+
+    def _read_from_stream(self, size):
+        if size is None or size < 0:
+            bytes_to_read = None
         else:
-            data = self._audio_stream.readframes(size)
-            if data is None or len(data) < 1:
-                return None
-            return data
+            bytes_to_read = size * self._sample_size
+        data = self._audio_stream.read(bytes_to_read)
+        return data
+
+
+class WaveAudioSource(FileAudioSource):
+    """
+    A class for an `AudioSource` that reads data from a wave file.
+
+    This class should be used for large wave files to avoid loading the whole
+    data to memory.
+
+    Parameters
+    ----------
+    filename : str
+        path to a valid wave file.
+    """
+
+    def __init__(self, filename):
+        self._filename = filename
+        self._audio_stream = None
+        stream = wave.open(self._filename, "rb")
+        FileAudioSource.__init__(
+            self,
+            stream.getframerate(),
+            stream.getsampwidth(),
+            stream.getnchannels(),
+        )
+        stream.close()
+
+    def open(self):
+        if self._audio_stream is None:
+            self._audio_stream = wave.open(self._filename)
+
+    def _read_from_stream(self, size):
+        if size is None or size < 0:
+            size = -1
+        return self._audio_stream.readframes(size)
 
 
 class PyAudioSource(AudioSource):
     """
-    A class for an `AudioSource` that reads data the built-in microphone using PyAudio. 
+    A class for an `AudioSource` that reads data from built-in microphone using
+    PyAudio (https://people.csail.mit.edu/hubert/pyaudio/).
+
+    Parameters
+    ----------
+    sampling_rate : int, default: 16000
+        number of samples per second of audio data.
+    sample_width : int, default: 2
+        size in bytes of one audio sample. Possible values: 1, 2 or 4.
+    channels : int, default: 1
+        number of channels of audio data.
+    frames_per_buffer : int, default: 1024
+        PyAudio number of frames per buffer.
+    input_device_index: None or int, default: None
+        PyAudio index of audio device to read audio data from. If None default
+        device is used.
     """
-    
-    def __init__(self, sampling_rate = DEFAULT_SAMPLE_RATE,
-                 sample_width = DEFAULT_SAMPLE_WIDTH,
-                 channels = DEFAULT_NB_CHANNELS,
-                 frames_per_buffer = 1024):
-        
-        
+
+    def __init__(
+        self,
+        sampling_rate=16000,
+        sample_width=2,
+        channels=1,
+        frames_per_buffer=1024,
+        input_device_index=None,
+    ):
+
         AudioSource.__init__(self, sampling_rate, sample_width, channels)
         self._chunk_size = frames_per_buffer
-        
+        self.input_device_index = input_device_index
+
         import pyaudio
+
         self._pyaudio_object = pyaudio.PyAudio()
-        self._pyaudio_format = self._pyaudio_object.get_format_from_width(self.sample_width) 
+        self._pyaudio_format = self._pyaudio_object.get_format_from_width(
+            self.sample_width
+        )
         self._audio_stream = None
 
-    
     def is_open(self):
         return self._audio_stream is not None
-    
+
     def open(self):
-        self._audio_stream = self._pyaudio_object.open(format = self._pyaudio_format,
-                                                   channels = self.channels,
-                                                   rate = self.sampling_rate,
-                                                   input = True,
-                                                   output = False,
-                                                   frames_per_buffer = self._chunk_size)
-        
-        
+        self._audio_stream = self._pyaudio_object.open(
+            format=self._pyaudio_format,
+            channels=self.channels,
+            rate=self.sampling_rate,
+            input=True,
+            output=False,
+            input_device_index=self.input_device_index,
+            frames_per_buffer=self._chunk_size,
+        )
+
     def close(self):
         if self._audio_stream is not None:
             self._audio_stream.stop_stream()
             self._audio_stream.close()
             self._audio_stream = None
-            
-    
+
     def read(self, size):
         if self._audio_stream is None:
             raise IOError("Stream is not open")
-        
         if self._audio_stream.is_active():
             data = self._audio_stream.read(size)
             if data is None or len(data) < 1:
                 return None
             return data
-        
         return None
-    
 
-class StdinAudioSource(AudioSource):
+
+class StdinAudioSource(FileAudioSource):
     """
-    A class for an :class:`AudioSource` that reads data from standard input.
+    A class for an `AudioSource` that reads data from standard input.
+
+    Parameters
+    ----------
+    sampling_rate : int, default: 16000
+        number of samples per second of audio data.
+    sample_width : int, default: 2
+        size in bytes of one audio sample. Possible values: 1, 2 or 4.
+    channels : int, default: 1
+        number of channels of audio data.
     """
-    
-    def __init__(self, sampling_rate = DEFAULT_SAMPLE_RATE,
-                 sample_width = DEFAULT_SAMPLE_WIDTH,
-                 channels = DEFAULT_NB_CHANNELS):
-    
-        AudioSource.__init__(self, sampling_rate, sample_width, channels)
+
+    def __init__(
+        self, sampling_rate=16000, sample_width=2, channels=1,
+    ):
+        FileAudioSource.__init__(self, sampling_rate, sample_width, channels)
         self._is_open = False
-    
-    
+        self._sample_size = sample_width * channels
+        self._stream = sys.stdin.buffer
+
     def is_open(self):
         return self._is_open
-        
+
     def open(self):
         self._is_open = True
-    
+
     def close(self):
         self._is_open = False
-        
-    def read(self, size):
-        if not self._is_open:
-            raise IOError("Stream is not open")
-        
-        to_read = size * self.sample_width * self.channels
-        data = sys.stdin.read(to_read)
-        
-        if data is None or len(data) < 1:
-            return None
-        
-        return data
-       
-           
-class PyAudioPlayer():
+
+    def _read_from_stream(self, size):
+        bytes_to_read = size * self._sample_size
+        data = self._stream.read(bytes_to_read)
+        if data:
+            return data
+        return None
+
+
+def _make_tqdm_progress_bar(iterable, total, duration, **tqdm_kwargs):
+    fmt = tqdm_kwargs.get("bar_format", DEFAULT_BAR_FORMAT_TQDM)
+    fmt = fmt.replace("{duration}", "{:.3f}".format(duration))
+    tqdm_kwargs["bar_format"] = fmt
+
+    tqdm_kwargs["ncols"] = tqdm_kwargs.get("ncols", DEFAULT_NCOLS_TQDM)
+    tqdm_kwargs["mininterval"] = tqdm_kwargs.get(
+        "mininterval", DEFAULT_MIN_INTERVAL_TQDM
+    )
+    return _tqdm(iterable, total=total, **tqdm_kwargs)
+
+
+class PyAudioPlayer:
     """
     A class for audio playback using Pyaudio
+    (https://people.csail.mit.edu/hubert/pyaudio/).
+
+    Parameters
+    ----------
+    sampling_rate : int, default: 16000
+        number of samples per second of audio data.
+    sample_width : int, default: 2
+        size in bytes of one audio sample. Possible values: 1, 2 or 4.
+    channels : int, default: 1
+        number of channels of audio data.
     """
-    
-    def __init__(self, sampling_rate = DEFAULT_SAMPLE_RATE,
-                 sample_width = DEFAULT_SAMPLE_WIDTH,
-                 channels = DEFAULT_NB_CHANNELS):
-        if not sample_width in (1, 2, 4):
-            raise ValueError("Sample width must be one of: 1, 2 or 4 (bytes)")
-        
+
+    def __init__(
+        self, sampling_rate=16000, sample_width=2, channels=1,
+    ):
+        if sample_width not in (1, 2, 4):
+            raise ValueError("Sample width in bytes must be one of 1, 2 or 4")
+
         self.sampling_rate = sampling_rate
         self.sample_width = sample_width
         self.channels = channels
-        
+
         import pyaudio
+
         self._p = pyaudio.PyAudio()
-        self.stream = self._p.open(format = self._p.get_format_from_width(self.sample_width),
-         channels = self.channels, rate = self.sampling_rate,
-         input = False, output = True)
-        
-    def play(self, data):
+        self.stream = self._p.open(
+            format=self._p.get_format_from_width(self.sample_width),
+            channels=self.channels,
+            rate=self.sampling_rate,
+            input=False,
+            output=True,
+        )
+
+    def play(self, data, progress_bar=False, **progress_bar_kwargs):
+        chunk_gen, nb_chunks = self._chunk_data(data)
+        if progress_bar and _WITH_TQDM:
+            duration = len(data) / (
+                self.sampling_rate * self.sample_width * self.channels
+            )
+            chunk_gen = _make_tqdm_progress_bar(
+                chunk_gen,
+                total=nb_chunks,
+                duration=duration,
+                **progress_bar_kwargs
+            )
         if self.stream.is_stopped():
             self.stream.start_stream()
-        
-        for chunk in self._chunk_data(data):
-            self.stream.write(chunk)
-            
+        try:
+            for chunk in chunk_gen:
+                self.stream.write(chunk)
+        except KeyboardInterrupt:
+            pass
         self.stream.stop_stream()
-    
-    def  stop(self):
+
+    def stop(self):
         if not self.stream.is_stopped():
             self.stream.stop_stream()
         self.stream.close()
         self._p.terminate()
-    
+
     def _chunk_data(self, data):
         # make audio chunks of 100 ms to allow interruption (like ctrl+c)
-        chunk_size = int((self.sampling_rate * self.sample_width * self.channels) / 10)
-        start = 0
-        while start < len(data):
-            yield data[start : start + chunk_size]
-            start += chunk_size
-        
-
-def from_file(filename):
-    """
-    Create an `AudioSource` object using the audio file specified by `filename`.
-    The appropriate :class:`AudioSource` class is guessed from file's extension.
-    
-    :Parameters:
-    
-        `filename` :
-            path to an audio file.
-        
-    :Returns:
-    
-        an `AudioSource` object that reads data from the given file.
-    
-    """
-    
-    if filename.lower().endswith(".wav"):
-        return WaveAudioSource(filename)
-    
-    raise Exception("Can not create an AudioSource object from '%s'" %(filename))
-
-
-def player_for(audio_source):
-    """
-    Return a :class:`PyAudioPlayer` that can play data from `audio_source`.
-    
-    :Parameters:
-    
-        `audio_source` : 
-            an `AudioSource` object.
-    
-    :Returns:
-    
-        `PyAudioPlayer` that has the same sampling rate, sample width and number of channels
-        as `audio_source`.
-    """
-    
-    return PyAudioPlayer(audio_source.get_sampling_rate(),
-            audio_source.get_sample_width(),
-            audio_source.get_channels())
-    
-    
+        bytes_1_sec = self.sampling_rate * self.sample_width * self.channels
+        chunk_size = bytes_1_sec // 10
+        # make sure chunk_size is a multiple of sample_width * channels
+        chunk_size -= chunk_size % (self.sample_width * self.channels)
+        nb_chunks, rest = divmod(len(data), chunk_size)
+        if rest > 0:
+            nb_chunks += 1
+        chunk_gen = (
+            data[i : i + chunk_size] for i in range(0, len(data), chunk_size)
+        )
+        return chunk_gen, nb_chunks
+
+
+def player_for(source):
+    """
+    Return an `AudioPlayer` compatible with `source` (i.e., has the same
+    sampling rate, sample width and number of channels).
+
+    Parameters
+    ----------
+    source : AudioSource
+        An object that has `sampling_rate`, `sample_width` and `sample_width`
+        attributes.
+
+    Returns
+    -------
+    player : PyAudioPlayer
+        An audio player that has the same sampling rate, sample width
+        and number of channels as `source`.
+    """
+    return PyAudioPlayer(
+        source.sampling_rate, source.sample_width, source.channels
+    )
+
+
+def get_audio_source(input=None, **kwargs):
+    """
+    Create and return an AudioSource from input.
+
+    Parameters
+    ----------
+    input : str, bytes, "-" or None (default)
+        source to read audio data from. If `str`, it should be a path to a valid
+        audio file. If `bytes`, it is used as raw audio data. If it is "-",
+        raw data will be read from stdin. If None, read audio data from the
+        microphone using PyAudio.
+    kwargs
+        audio parameters used to build the `AudioSource` object. Depending on
+        the nature of `input`, theses may be omitted (e.g., when `input` is an
+        audio file in a popular audio format such as wav, ogg, flac, etc.) or
+        include parameters such as `sampling_rate`, `sample_width`, `channels`
+        (or their respective short name versions `sr`, `sw` and `ch`) if `input`
+        is a path to a raw (headerless) audio file, a bytes object for raw audio
+        data or None (to read data from built-in microphone). See the respective
+        `AudioSource` classes from more information about possible parameters.
+
+    Returns
+    -------
+    source : AudioSource
+        audio source created from input parameters
+    """
+    if input == "-":
+        return StdinAudioSource(*_get_audio_parameters(kwargs))
 
+    if isinstance(input, bytes):
+        return BufferAudioSource(input, *_get_audio_parameters(kwargs))
+
+    # read data from a file
+    if input is not None:
+        return from_file(filename=input, **kwargs)
+
+    # read data from microphone via pyaudio
+    else:
+        frames_per_buffer = kwargs.get("frames_per_buffer", 1024)
+        input_device_index = kwargs.get("input_device_index")
+        return PyAudioSource(
+            *_get_audio_parameters(kwargs),
+            frames_per_buffer=frames_per_buffer,
+            input_device_index=input_device_index
+        )
+
+
+def _load_raw(file, sampling_rate, sample_width, channels, large_file=False):
+    """
+    Load a raw audio file with standard Python. If `large_file` is True, return
+    a `RawAudioSource` object that reads data lazily from disk, otherwise load
+    all data to memory and return a `BufferAudioSource` object.
+
+    Parameters
+    ----------
+    file : str
+        path to a raw audio data file.
+    sampling_rate : int
+        sampling rate of audio data.
+    sample_width : int
+        size in bytes of one audio sample.
+    channels : int
+        number of channels of audio data.
+    large_file : bool
+        if True, return a `RawAudioSource` otherwise a `BufferAudioSource`
+        object.
+
+    Returns
+    -------
+    source : RawAudioSource or BufferAudioSource
+        an `AudioSource` that reads data from input file.
+    """
+    if None in (sampling_rate, sample_width, channels):
+        raise AudioParameterError(
+            "All audio parameters are required for raw audio files"
+        )
+
+    if large_file:
+        return RawAudioSource(
+            file,
+            sampling_rate=sampling_rate,
+            sample_width=sample_width,
+            channels=channels,
+        )
+
+    with open(file, "rb") as fp:
+        data = fp.read()
+    return BufferAudioSource(
+        data,
+        sampling_rate=sampling_rate,
+        sample_width=sample_width,
+        channels=channels,
+    )
+
+
+def _load_wave(file, large_file=False):
+    """
+    Load a wave audio file with standard Python. If `large_file` is True, return
+    a `WaveAudioSource` object that reads data lazily from disk, otherwise load
+    all data to memory and return a `BufferAudioSource` object.
+
+    Parameters
+    ----------
+    file : str
+        path to a wav audio data file
+    large_file : bool
+        if True, return a `WaveAudioSource` otherwise a `BufferAudioSource`
+        object.
+
+    Returns
+    -------
+    source : WaveAudioSource or BufferAudioSource
+        an `AudioSource` that reads data from input file.
+    """
+    if large_file:
+        return WaveAudioSource(file)
+    with wave.open(file) as fp:
+        channels = fp.getnchannels()
+        srate = fp.getframerate()
+        swidth = fp.getsampwidth()
+        data = fp.readframes(-1)
+    return BufferAudioSource(
+        data, sampling_rate=srate, sample_width=swidth, channels=channels
+    )
+
+
+def _load_with_pydub(file, audio_format=None):
+    """
+    Open compressed audio or video file using pydub. If a video file
+    is passed, its audio track(s) are extracted and loaded.
+
+    Parameters
+    ----------
+    file : str
+        path to audio file.
+    audio_format : str, default: None
+        string, audio/video file format if known (e.g. raw, webm, wav, ogg)
+
+    Returns
+    -------
+    source : BufferAudioSource
+        an `AudioSource` that reads data from input file.
+    """
+    func_dict = {
+        "mp3": AudioSegment.from_mp3,
+        "ogg": AudioSegment.from_ogg,
+        "flv": AudioSegment.from_flv,
+    }
+    open_function = func_dict.get(audio_format, AudioSegment.from_file)
+    segment = open_function(file)
+    return BufferAudioSource(
+        data=segment.raw_data,
+        sampling_rate=segment.frame_rate,
+        sample_width=segment.sample_width,
+        channels=segment.channels,
+    )
+
+
+def from_file(filename, audio_format=None, large_file=False, **kwargs):
+    """
+    Read audio data from `filename` and return an `AudioSource` object.
+    if `audio_format` is None, the appropriate `AudioSource` class is guessed
+    from file's extension. `filename` can be a compressed audio or video file.
+    This will require installing `pydub` (https://github.com/jiaaro/pydub).
+
+    The normal behavior is to load all audio data to memory from which a
+    :class:`BufferAudioSource` object is created. This should be convenient
+    most     of the time unless audio file is very large. In that case, and
+    in order to load audio data in lazy manner (i.e. read data from disk each
+    time :func:`AudioSource.read` is called), `large_file` should be True.
+
+    Note that the current implementation supports only wave and raw formats for
+    lazy audio loading.
+
+    If an audio format is `raw`, the following keyword arguments are required:
+
+        - `sampling_rate`, `sr`: int,  sampling rate of audio data.
+        - `sample_width`, `sw`: int, size in bytes of one audio sample.
+        - `channels`, `ch`: int, number of channels of audio data.
+
+    See also
+    --------
+    :func:`to_file`.
+
+    Parameters
+    ----------
+    filename : str
+        path to input audio or video file.
+    audio_format : str
+        audio format used to save data  (e.g. raw, webm, wav, ogg).
+    large_file : bool, default: False
+        if True, audio won't fully be loaded to memory but only when a window
+        is read from disk.
+
+
+    Other Parameters
+    ----------------
+    sampling_rate, sr: int
+        sampling rate of audio data
+    sample_width : int
+        sample width (i.e. number of bytes used to represent one audio sample)
+    channels : int
+        number of channels of audio data
+
+    Returns
+    -------
+    audio_source : AudioSource
+        an :class:`AudioSource` object that reads data from input file.
+
+    Raises
+    ------
+    `AudioIOError`
+        raised if audio data cannot be read in the given
+        format or if `format` is `raw` and one or more audio parameters are missing.
+    """
+    audio_format = _guess_audio_format(audio_format, filename)
+
+    if audio_format == "raw":
+        srate, swidth, channels = _get_audio_parameters(kwargs)
+        return _load_raw(filename, srate, swidth, channels, large_file)
+
+    if audio_format in ["wav", "wave"]:
+        return _load_wave(filename, large_file)
+    if large_file:
+        err_msg = "if 'large_file` is True file format should be raw or wav"
+        raise AudioIOError(err_msg)
+    if _WITH_PYDUB:
+        return _load_with_pydub(filename, audio_format=audio_format)
+    else:
+        raise AudioIOError(
+            "pydub is required for audio formats other than raw or wav"
+        )
+
+
+def _save_raw(data, file):
+    """
+    Saves audio data as a headerless (i.e. raw) file.
+    See also :func:`to_file`.
+    """
+    with open(file, "wb") as fp:
+        fp.write(data)
+
+
+def _save_wave(data, file, sampling_rate, sample_width, channels):
+    """
+    Saves audio data to a wave file.
+    See also :func:`to_file`.
+    """
+    if None in (sampling_rate, sample_width, channels):
+        raise AudioParameterError(
+            "All audio parameters are required to save wave audio files"
+        )
+    with wave.open(file, "w") as fp:
+        fp.setframerate(sampling_rate)
+        fp.setsampwidth(sample_width)
+        fp.setnchannels(channels)
+        fp.writeframes(data)
+
+
+def _save_with_pydub(
+    data, file, audio_format, sampling_rate, sample_width, channels
+):
+    """
+    Saves audio data with pydub (https://github.com/jiaaro/pydub).
+    See also :func:`to_file`.
+    """
+    segment = AudioSegment(
+        data,
+        frame_rate=sampling_rate,
+        sample_width=sample_width,
+        channels=channels,
+    )
+    with open(file, "wb") as fp:
+        segment.export(fp, format=audio_format)
+
+
+def to_file(data, file, audio_format=None, **kwargs):
+    """
+    Writes audio data to file. If `audio_format` is `None`, output
+    audio format will be guessed from extension. If `audio_format`
+    is `None` and `file` comes without an extension then audio
+    data will be written as a raw audio file.
+
+    Parameters
+    ----------
+    data : bytes-like
+        audio data to be written. Can be a `bytes`, `bytearray`,
+        `memoryview`, `array` or `numpy.ndarray` object.
+    file : str
+        path to output audio file.
+    audio_format : str
+        audio format used to save data (e.g. raw, webm, wav, ogg)
+    kwargs: dict
+        If an audio format other than `raw` is used, the following keyword
+        arguments are required:
+
+        - `sampling_rate`, `sr`: int,  sampling rate of audio data.
+        - `sample_width`, `sw`: int, size in bytes of one audio sample.
+        - `channels`, `ch`: int, number of channels of audio data.
+
+    Raises
+    ------
+    `AudioParameterError` if output format is different than raw and one or more
+    audio parameters are missing. `AudioIOError` if audio data cannot be written
+    in the desired format.
+    """
+    audio_format = _guess_audio_format(audio_format, file)
+    if audio_format in (None, "raw"):
+        _save_raw(data, file)
+        return
+    try:
+        sampling_rate, sample_width, channels = _get_audio_parameters(kwargs)
+    except AudioParameterError as exc:
+        err_message = "All audio parameters are required to save formats "
+        "other than raw. Error detail: {}".format(exc)
+        raise AudioParameterError(err_message)
+    if audio_format in ("wav", "wave"):
+        _save_wave(data, file, sampling_rate, sample_width, channels)
+    elif _WITH_PYDUB:
+        _save_with_pydub(
+            data, file, audio_format, sampling_rate, sample_width, channels
+        )
+    else:
+        err_message = "cannot write file format {} (file name: {})"
+        raise AudioIOError(err_message.format(audio_format, file))
diff --git a/libs/auditok/plotting.py b/libs/auditok/plotting.py
new file mode 100755
index 000000000..eca5877f4
--- /dev/null
+++ b/libs/auditok/plotting.py
@@ -0,0 +1,150 @@
+import matplotlib.pyplot as plt
+import numpy as np
+
+AUDITOK_PLOT_THEME = {
+    "figure": {"facecolor": "#482a36", "alpha": 0.2},
+    "plot": {"facecolor": "#282a36"},
+    "energy_threshold": {
+        "color": "#e31f8f",
+        "linestyle": "--",
+        "linewidth": 1,
+    },
+    "signal": {"color": "#40d970", "linestyle": "-", "linewidth": 1},
+    "detections": {
+        "facecolor": "#777777",
+        "edgecolor": "#ff8c1a",
+        "linewidth": 1,
+        "alpha": 0.75,
+    },
+}
+
+
+def _make_time_axis(nb_samples, sampling_rate):
+    sample_duration = 1 / sampling_rate
+    x = np.linspace(0, sample_duration * (nb_samples - 1), nb_samples)
+    return x
+
+
+def _plot_line(x, y, theme, xlabel=None, ylabel=None, **kwargs):
+    color = theme.get("color", theme.get("c"))
+    ls = theme.get("linestyle", theme.get("ls"))
+    lw = theme.get("linewidth", theme.get("lw"))
+    plt.plot(x, y, c=color, ls=ls, lw=lw, **kwargs)
+    plt.xlabel(xlabel, fontsize=8)
+    plt.ylabel(ylabel, fontsize=8)
+
+
+def _plot_detections(subplot, detections, theme):
+    fc = theme.get("facecolor", theme.get("fc"))
+    ec = theme.get("edgecolor", theme.get("ec"))
+    ls = theme.get("linestyle", theme.get("ls"))
+    lw = theme.get("linewidth", theme.get("lw"))
+    alpha = theme.get("alpha")
+    for (start, end) in detections:
+        subplot.axvspan(start, end, fc=fc, ec=ec, ls=ls, lw=lw, alpha=alpha)
+
+
+def plot(
+    audio_region,
+    scale_signal=True,
+    detections=None,
+    energy_threshold=None,
+    show=True,
+    figsize=None,
+    save_as=None,
+    dpi=120,
+    theme="auditok",
+):
+    y = np.asarray(audio_region)
+    if len(y.shape) == 1:
+        y = y.reshape(1, -1)
+    nb_subplots, nb_samples = y.shape
+    sampling_rate = audio_region.sampling_rate
+    time_axis = _make_time_axis(nb_samples, sampling_rate)
+    if energy_threshold is not None:
+        eth_log10 = energy_threshold * np.log(10) / 10
+        amplitude_threshold = np.sqrt(np.exp(eth_log10))
+    else:
+        amplitude_threshold = None
+    if detections is None:
+        detections = []
+    else:
+        # End of detection corresponds to the end of the last sample but
+        # to stay compatible with the time axis of signal plotting we want end
+        # of detection to correspond to the *start* of the that last sample.
+        detections = [
+            (start, end - (1 / sampling_rate)) for (start, end) in detections
+        ]
+    if theme == "auditok":
+        theme = AUDITOK_PLOT_THEME
+
+    fig = plt.figure(figsize=figsize, dpi=dpi)
+    fig_theme = theme.get("figure", theme.get("fig", {}))
+    fig_fc = fig_theme.get("facecolor", fig_theme.get("ffc"))
+    fig_alpha = fig_theme.get("alpha", 1)
+    fig.patch.set_facecolor(fig_fc)
+    fig.patch.set_alpha(fig_alpha)
+
+    plot_theme = theme.get("plot", {})
+    plot_fc = plot_theme.get("facecolor", plot_theme.get("pfc"))
+
+    if nb_subplots > 2 and nb_subplots % 2 == 0:
+        nb_rows = nb_subplots // 2
+        nb_columns = 2
+    else:
+        nb_rows = nb_subplots
+        nb_columns = 1
+
+    for sid, samples in enumerate(y, 1):
+        ax = fig.add_subplot(nb_rows, nb_columns, sid)
+        ax.set_facecolor(plot_fc)
+        if scale_signal:
+            std = samples.std()
+            if std > 0:
+                mean = samples.mean()
+                std = samples.std()
+                samples = (samples - mean) / std
+                max_ = samples.max()
+                plt.ylim(-1.5 * max_, 1.5 * max_)
+        if amplitude_threshold is not None:
+            if scale_signal and std > 0:
+                amp_th = (amplitude_threshold - mean) / std
+            else:
+                amp_th = amplitude_threshold
+            eth_theme = theme.get("energy_threshold", theme.get("eth", {}))
+            _plot_line(
+                [time_axis[0], time_axis[-1]],
+                [amp_th] * 2,
+                eth_theme,
+                label="Detection threshold",
+            )
+            if sid == 1:
+                legend = plt.legend(
+                    ["Detection threshold"],
+                    facecolor=fig_fc,
+                    framealpha=0.1,
+                    bbox_to_anchor=(0.0, 1.15, 1.0, 0.102),
+                    loc=2,
+                )
+                legend = plt.gca().add_artist(legend)
+
+        signal_theme = theme.get("signal", {})
+        _plot_line(
+            time_axis,
+            samples,
+            signal_theme,
+            xlabel="Time (seconds)",
+            ylabel="Signal{}".format(" (scaled)" if scale_signal else ""),
+        )
+        detections_theme = theme.get("detections", {})
+        _plot_detections(ax, detections, detections_theme)
+        plt.title("Channel {}".format(sid), fontsize=10)
+
+        plt.xticks(fontsize=8)
+        plt.yticks(fontsize=8)
+    plt.tight_layout()
+
+    if save_as is not None:
+        plt.savefig(save_as, dpi=dpi)
+    if show:
+        plt.show()
diff --git a/libs/auditok/signal.py b/libs/auditok/signal.py
new file mode 100644
index 000000000..3f00fb9e5
--- /dev/null
+++ b/libs/auditok/signal.py
@@ -0,0 +1,179 @@
+"""
+Module for basic audio signal processing and array operations.
+
+.. autosummary::
+    :toctree: generated/
+
+    to_array
+    extract_single_channel
+    compute_average_channel
+    compute_average_channel_stereo
+    separate_channels
+    calculate_energy_single_channel
+    calculate_energy_multichannel
+"""
+from array import array as array_
+import audioop
+import math
+
+FORMAT = {1: "b", 2: "h", 4: "i"}
+_EPSILON = 1e-10
+
+
+def to_array(data, sample_width, channels):
+    """Extract individual channels of audio data and return a list of arrays of
+    numeric samples. This will always return a list of `array.array` objects
+    (one per channel) even if audio data is mono.
+
+    Parameters
+    ----------
+    data : bytes
+        raw audio data.
+    sample_width : int
+        size in bytes of one audio sample (one channel considered).
+
+    Returns
+    -------
+    samples_arrays : list
+        list of arrays of audio samples.
+    """
+    fmt = FORMAT[sample_width]
+    if channels == 1:
+        return [array_(fmt, data)]
+    return separate_channels(data, fmt, channels)
+
+
+def extract_single_channel(data, fmt, channels, selected):
+    samples = array_(fmt, data)
+    return samples[selected::channels]
+
+
+def compute_average_channel(data, fmt, channels):
+    """
+    Compute and return average channel of multi-channel audio data. If the
+    number of channels is 2, use :func:`compute_average_channel_stereo` (much
+    faster). This function uses satandard `array` module to convert `bytes` data
+    into an array of numeric values.
+
+    Parameters
+    ----------
+    data : bytes
+        multi-channel audio data to mix down.
+    fmt : str
+        format (single character) to pass to `array.array` to convert `data`
+        into an array of samples. This should be "b" if audio data's sample width
+        is 1, "h" if it's 2 and "i" if it's 4.
+    channels : int
+        number of channels of audio data.
+
+    Returns
+    -------
+    mono_audio : bytes
+        mixed down audio data.
+    """
+    all_channels = array_(fmt, data)
+    mono_channels = [
+        array_(fmt, all_channels[ch::channels]) for ch in range(channels)
+    ]
+    avg_arr = array_(
+        fmt,
+        (round(sum(samples) / channels) for samples in zip(*mono_channels)),
+    )
+    return avg_arr
+
+
+def compute_average_channel_stereo(data, sample_width):
+    """Compute and return average channel of stereo audio data. This function
+    should be used when the number of channels is exactly 2 because in that
+    case we can use standard `audioop` module which *much* faster then calling
+    :func:`compute_average_channel`.
+
+    Parameters
+    ----------
+    data : bytes
+        2-channel audio data to mix down.
+    sample_width : int
+        size in bytes of one audio sample (one channel considered).
+
+    Returns
+    -------
+    mono_audio : bytes
+        mixed down audio data.
+    """
+    fmt = FORMAT[sample_width]
+    arr = array_(fmt, audioop.tomono(data, sample_width, 0.5, 0.5))
+    return arr
+
+
+def separate_channels(data, fmt, channels):
+    """Create a list of arrays of audio samples (`array.array` objects), one for
+    each channel.
+
+    Parameters
+    ----------
+    data : bytes
+        multi-channel audio data to mix down.
+    fmt : str
+        format (single character) to pass to `array.array` to convert `data`
+        into an array of samples. This should be "b" if audio data's sample width
+        is 1, "h" if it's 2 and "i" if it's 4.
+    channels : int
+        number of channels of audio data.
+
+    Returns
+    -------
+    channels_arr : list
+        list of audio channels, each as a standard `array.array`.
+    """
+    all_channels = array_(fmt, data)
+    mono_channels = [
+        array_(fmt, all_channels[ch::channels]) for ch in range(channels)
+    ]
+    return mono_channels
+
+
+def calculate_energy_single_channel(data, sample_width):
+    """Calculate the energy of mono audio data. Energy is computed as:
+
+    .. math:: energy = 20 \log(\sqrt({1}/{N}\sum_{i}^{N}{a_i}^2)) % # noqa: W605
+
+    where `a_i` is the i-th audio sample and `N` is the number of audio samples
+    in data.
+
+    Parameters
+    ----------
+    data : bytes
+        single-channel audio data.
+    sample_width : int
+        size in bytes of one audio sample.
+
+    Returns
+    -------
+    energy : float
+        energy of audio signal.
+    """
+    energy_sqrt = max(audioop.rms(data, sample_width), _EPSILON)
+    return 20 * math.log10(energy_sqrt)
+
+
+def calculate_energy_multichannel(x, sample_width, aggregation_fn=max):
+    """Calculate the energy of multi-channel audio data. Energy is calculated
+    channel-wise. An aggregation function is applied to the resulting energies
+    (default: `max`). Also see :func:`calculate_energy_single_channel`.
+
+    Parameters
+    ----------
+    data : bytes
+        single-channel audio data.
+    sample_width : int
+        size in bytes of one audio sample (one channel considered).
+    aggregation_fn : callable, default: max
+        aggregation function to apply to the resulting per-channel energies.
+
+    Returns
+    -------
+    energy : float
+        aggregated energy of multi-channel audio signal.
+    """
+    energies = (calculate_energy_single_channel(xi, sample_width) for xi in x)
+    return aggregation_fn(energies)
diff --git a/libs/auditok/signal_numpy.py b/libs/auditok/signal_numpy.py
new file mode 100644
index 000000000..bf5425197
--- /dev/null
+++ b/libs/auditok/signal_numpy.py
@@ -0,0 +1,30 @@
+import numpy as np
+from .signal import (
+    compute_average_channel_stereo,
+    calculate_energy_single_channel,
+    calculate_energy_multichannel,
+)
+
+FORMAT = {1: np.int8, 2: np.int16, 4: np.int32}
+
+
+def to_array(data, sample_width, channels):
+    fmt = FORMAT[sample_width]
+    if channels == 1:
+        return np.frombuffer(data, dtype=fmt).astype(np.float64)
+    return separate_channels(data, fmt, channels).astype(np.float64)
+
+
+def extract_single_channel(data, fmt, channels, selected):
+    samples = np.frombuffer(data, dtype=fmt)
+    return np.asanyarray(samples[selected::channels], order="C")
+
+
+def compute_average_channel(data, fmt, channels):
+    array = np.frombuffer(data, dtype=fmt).astype(np.float64)
+    return array.reshape(-1, channels).mean(axis=1).round().astype(fmt)
+
+
+def separate_channels(data, fmt, channels):
+    array = np.frombuffer(data, dtype=fmt)
+    return np.asanyarray(array.reshape(-1, channels).T, order="C")
diff --git a/libs/auditok/util.py b/libs/auditok/util.py
index d46a8899c..f29eb9bf3 100644
--- a/libs/auditok/util.py
+++ b/libs/auditok/util.py
@@ -1,448 +1,624 @@
 """
-Class summary
-=============
-
 .. autosummary::
+    :toctree: generated/
 
-        DataSource
-        StringDataSource
-        ADSFactory
-        ADSFactory.AudioDataSource
-        ADSFactory.ADSDecorator
-        ADSFactory.OverlapADS
-        ADSFactory.LimiterADS
-        ADSFactory.RecorderADS
-        DataValidator
-        AudioEnergyValidator
-
+    AudioEnergyValidator
+    AudioReader
+    Recorder
+    make_duration_formatter
+    make_channel_selector
 """
+from abc import ABC, abstractmethod
+import warnings
+from functools import partial
+from .io import (
+    AudioIOError,
+    AudioSource,
+    from_file,
+    BufferAudioSource,
+    PyAudioSource,
+    get_audio_source,
+)
+from .exceptions import (
+    DuplicateArgument,
+    TooSamllBlockDuration,
+    TimeFormatError,
+)
 
+try:
+    from . import signal_numpy as signal
+except ImportError:
+    from . import signal
 
-from abc import ABCMeta, abstractmethod
-import math
-from array import array
-from .io import Rewindable, from_file, BufferAudioSource, PyAudioSource
-from .exceptions import DuplicateArgument
-import sys
 
+__all__ = [
+    "make_duration_formatter",
+    "make_channel_selector",
+    "DataSource",
+    "DataValidator",
+    "StringDataSource",
+    "ADSFactory",
+    "AudioDataSource",
+    "AudioReader",
+    "Recorder",
+    "AudioEnergyValidator",
+]
 
-try:
-    import numpy
-    _WITH_NUMPY = True
-except ImportError as e:
-    _WITH_NUMPY = False
-    
-try:
-    from builtins import str
-    basestring = str
-except ImportError as e:
-    if sys.version_info >= (3, 0):
-        basestring = str
-    
-    
-
-__all__ = ["DataSource", "DataValidator", "StringDataSource", "ADSFactory", "AudioEnergyValidator"]
-    
-
-class DataSource():
+
+def make_duration_formatter(fmt):
+    """
+    Make and return a function used to format durations in seconds. Accepted
+    format directives are:
+
+    - ``%S`` : absolute number of seconds with 3 decimals. This direction should
+      be used alone.
+    - ``%i`` : milliseconds
+    - ``%s`` : seconds
+    - ``%m`` : minutes
+    - ``%h`` : hours
+
+    These last 4 directives should all be specified. They can be placed anywhere
+    in the input string.
+
+    Parameters
+    ----------
+    fmt : str
+        duration format.
+
+    Returns
+    -------
+    formatter : callable
+        a function that takes a duration in seconds (float) and returns a string
+        that corresponds to that duration.
+
+    Raises
+    ------
+    TimeFormatError
+        if the format contains an unknown directive.
+
+    Examples
+    --------
+
+    Using ``%S``:
+
+    .. code:: python
+
+        formatter = make_duration_formatter("%S")
+        formatter(123.589)
+        '123.589'
+        formatter(123)
+        '123.000'
+
+    Using the other directives:
+
+    .. code:: python
+
+        formatter = make_duration_formatter("%h:%m:%s.%i")
+        formatter(3600+120+3.25)
+        '01:02:03.250'
+
+        formatter = make_duration_formatter("%h hrs, %m min, %s sec and %i ms")
+        formatter(3600+120+3.25)
+        '01 hrs, 02 min, 03 sec and 250 ms'
+
+        # omitting one of the 4 directives might result in a wrong duration
+        formatter = make_duration_formatter("%m min, %s sec and %i ms")
+        formatter(3600+120+3.25)
+        '02 min, 03 sec and 250 ms'
+    """
+    if fmt == "%S":
+
+        def fromatter(seconds):
+            return "{:.3f}".format(seconds)
+
+    elif fmt == "%I":
+
+        def fromatter(seconds):
+            return "{0}".format(int(seconds * 1000))
+
+    else:
+        fmt = fmt.replace("%h", "{hrs:02d}")
+        fmt = fmt.replace("%m", "{mins:02d}")
+        fmt = fmt.replace("%s", "{secs:02d}")
+        fmt = fmt.replace("%i", "{millis:03d}")
+        try:
+            i = fmt.index("%")
+            raise TimeFormatError(
+                "Unknown time format directive '{0}'".format(fmt[i : i + 2])
+            )
+        except ValueError:
+            pass
+
+        def fromatter(seconds):
+            millis = int(seconds * 1000)
+            hrs, millis = divmod(millis, 3600000)
+            mins, millis = divmod(millis, 60000)
+            secs, millis = divmod(millis, 1000)
+            return fmt.format(hrs=hrs, mins=mins, secs=secs, millis=millis)
+
+    return fromatter
+
+
+def make_channel_selector(sample_width, channels, selected=None):
+    """Create and return a callable used for audio channel selection. The
+    returned selector can be used as `selector(audio_data)` and returns data
+    that contains selected channel only.
+
+    Importantly, if `selected` is None or equals "any", `selector(audio_data)`
+    will separate and return a list of available channels:
+    `[data_channe_1, data_channe_2, ...].`
+
+    Note also that returned `selector` expects `bytes` format for input data but
+    does notnecessarily return a `bytes` object. In fact, in order to extract
+    the desired channel (or compute the average channel if `selected` = "avg"),
+    it first converts input data into a `array.array` (or `numpy.ndarray`)
+    object. After channel of interst is selected/computed, it is returned as
+    such, without any reconversion to `bytes`. This behavior is wanted for
+    efficiency purposes because returned objects can be directly used as buffers
+    of bytes. In any case, returned objects can be converted back to `bytes`
+    using `bytes(obj)`.
+
+    Exception to this is the special case where `channels` = 1 in which input
+    data is returned without any processing.
+
+
+    Parameters
+    ----------
+    sample_width : int
+        number of bytes used to encode one audio sample, should be 1, 2 or 4.
+    channels : int
+        number of channels of raw audio data that the returned selector should
+        expect.
+    selected : int or str, default: None
+        audio channel to select and return when calling `selector(raw_data)`. It
+        should be an int >= `-channels` and < `channels`. If one of "mix",
+        "avg" or "average" is passed then `selector` will return the average
+        channel of audio data. If None or "any", return a list of all available
+        channels at each call.
+
+    Returns
+    -------
+    selector : callable
+        a callable that can be used as `selector(audio_data)` and returns data
+        that contains channel of interst.
+
+    Raises
+    ------
+    ValueError
+        if `sample_width` is not one of 1, 2 or 4, or if `selected` has an
+        unexpected value.
     """
-    Base class for objects passed to :func:`auditok.core.StreamTokenizer.tokenize`.
+    fmt = signal.FORMAT.get(sample_width)
+    if fmt is None:
+        err_msg = "'sample_width' must be 1, 2 or 4, given: {}"
+        raise ValueError(err_msg.format(sample_width))
+    if channels == 1:
+        return lambda x: x
+
+    if isinstance(selected, int):
+        if selected < 0:
+            selected += channels
+        if selected < 0 or selected >= channels:
+            err_msg = "Selected channel must be >= -channels and < channels"
+            err_msg += ", given: {}"
+            raise ValueError(err_msg.format(selected))
+        return partial(
+            signal.extract_single_channel,
+            fmt=fmt,
+            channels=channels,
+            selected=selected,
+        )
+
+    if selected in ("mix", "avg", "average"):
+        if channels == 2:
+            # when data is stereo, using audioop when possible is much faster
+            return partial(
+                signal.compute_average_channel_stereo,
+                sample_width=sample_width,
+            )
+
+        return partial(
+            signal.compute_average_channel, fmt=fmt, channels=channels
+        )
+
+    if selected in (None, "any"):
+        return partial(signal.separate_channels, fmt=fmt, channels=channels)
+
+    raise ValueError(
+        "Selected channel must be an integer, None (alias 'any') or 'average' "
+        "(alias 'avg' or 'mix')"
+    )
+
+
+class DataSource(ABC):
+    """
+    Base class for objects passed to :func:`StreamTokenizer.tokenize`.
     Subclasses should implement a :func:`DataSource.read` method.
     """
-    __metaclass__ = ABCMeta
-    
+
     @abstractmethod
     def read(self):
         """
-        Read a piece of data read from this source.
+        Read a block (i.e., window) of data read from this source.
         If no more data is available, return None.
         """
-    
-    
-class DataValidator():
+
+
+class DataValidator(ABC):
     """
-    Base class for a validator object used by :class:`.core.StreamTokenizer` to check
-    if read data is valid.
+    Base class for a validator object used by :class:`.core.StreamTokenizer`
+    to check if read data is valid.
     Subclasses should implement :func:`is_valid` method.
     """
-    __metaclass__ = ABCMeta
-    
+
     @abstractmethod
     def is_valid(self, data):
         """
         Check whether `data` is valid
         """
 
+
+class AudioEnergyValidator(DataValidator):
+    """
+    A validator based on audio signal energy. For an input window of `N` audio
+    samples (see :func:`AudioEnergyValidator.is_valid`), the energy is computed
+    as:
+
+    .. math:: energy = 20 \log(\sqrt({1}/{N}\sum_{i}^{N}{a_i}^2))  % # noqa: W605
+
+    where `a_i` is the i-th audio sample.
+
+    Parameters
+    ----------
+    energy_threshold : float
+        minimum energy that audio window should have to be valid.
+    sample_width : int
+        size in bytes of one audio sample.
+    channels : int
+        number of channels of audio data.
+    use_channel : {None, "any", "mix", "avg", "average"} or int
+        channel to use for energy computation. The following values are
+        accepted:
+
+        - None (alias "any") : compute energy for each of the channels and return
+          the maximum value.
+        - "mix" (alias "avg" or "average") : compute the average channel then
+          compute its energy.
+        - int (>= 0 , < `channels`) : compute the energy of the specified channel
+          and ignore the other ones.
+
+    Returns
+    -------
+    energy : float
+        energy of the audio window.
+    """
+
+    def __init__(
+        self, energy_threshold, sample_width, channels, use_channel=None
+    ):
+        self._sample_width = sample_width
+        self._selector = make_channel_selector(
+            sample_width, channels, use_channel
+        )
+        if channels == 1 or use_channel not in (None, "any"):
+            self._energy_fn = signal.calculate_energy_single_channel
+        else:
+            self._energy_fn = signal.calculate_energy_multichannel
+        self._energy_threshold = energy_threshold
+
+    def is_valid(self, data):
+        """
+
+        Parameters
+        ----------
+        data : bytes-like
+            array of raw audio data
+
+        Returns
+        -------
+        bool
+            True if the energy of audio data is >= threshold, False otherwise.
+        """
+        log_energy = self._energy_fn(self._selector(data), self._sample_width)
+        return log_energy >= self._energy_threshold
+
+
 class StringDataSource(DataSource):
     """
-    A class that represent a :class:`DataSource` as a string buffer.
-    Each call to :func:`DataSource.read` returns on character and moves one step forward.
-    If the end of the buffer is reached, :func:`read` returns None.
-   
-    :Parameters:
-        
-        `data` : 
-            a basestring object.
-     
+    Class that represent a :class:`DataSource` as a string buffer.
+    Each call to :func:`DataSource.read` returns on character and moves one
+    step forward. If the end of the buffer is reached, :func:`read` returns
+    None.
+
+    Parameters
+    ----------
+    data : str
+        a string object used as data.
+
     """
-     
+
     def __init__(self, data):
 
         self._data = None
         self._current = 0
         self.set_data(data)
-        
-    
+
     def read(self):
         """
         Read one character from buffer.
-        
-        :Returns:
-        
-            Current character or None if end of buffer is reached
+
+        Returns
+        -------
+        char : str
+            current character or None if end of buffer is reached.
         """
-        
+
         if self._current >= len(self._data):
             return None
         self._current += 1
         return self._data[self._current - 1]
-    
+
     def set_data(self, data):
         """
         Set a new data buffer.
-        
-        :Parameters:
-        
-            `data` : a basestring object 
-                New data buffer.
+
+        Parameters
+        ----------
+        data : str
+            new data buffer.
         """
-        
-        if not isinstance(data, basestring):
-            raise ValueError("data must an instance of basestring")
+
+        if not isinstance(data, str):
+            raise ValueError("data must an instance of str")
         self._data = data
         self._current = 0
-        
 
 
 class ADSFactory:
     """
-    Factory class that makes it easy to create an :class:`ADSFactory.AudioDataSource` object that implements
-    :class:`DataSource` and can therefore be passed to :func:`auditok.core.StreamTokenizer.tokenize`.
-    
-    Whether you read audio data from a file, the microphone or a memory buffer, this factory
-    instantiates and returns the right :class:`ADSFactory.AudioDataSource` object.
-    
-    There are many other features you want your :class:`ADSFactory.AudioDataSource` object to have, such as: 
-    memorize all read audio data so that you can rewind and reuse it (especially useful when 
-    reading data from the microphone), read a fixed amount of data (also useful when reading 
-    from the microphone), read overlapping audio frames (often needed when dosing a spectral
-    analysis of data).
-    
-    :func:`ADSFactory.ads` automatically creates and return object with the desired behavior according
-    to the supplied keyword arguments. 
-     
+    .. deprecated:: 2.0.0
+          `ADSFactory` will be removed in auditok 2.0.1, use instances of
+          :class:`AudioReader` instead.
+
+    Factory class that makes it easy to create an
+    :class:`AudioDataSource` object that implements
+    :class:`DataSource` and can therefore be passed to
+    :func:`auditok.core.StreamTokenizer.tokenize`.
+
+    Whether you read audio data from a file, the microphone or a memory buffer,
+    this factory instantiates and returns the right
+    :class:`AudioDataSource` object.
+
+    There are many other features you want a :class:`AudioDataSource` object to
+    have, such as: memorize all read audio data so that you can rewind and reuse
+    it (especially useful when reading data from the microphone), read a fixed
+    amount of data (also useful when reading from the microphone), read
+    overlapping audio frames (often needed when dosing a spectral analysis of
+    data).
+
+    :func:`ADSFactory.ads` automatically creates and return object with the
+    desired behavior according to the supplied keyword arguments.
     """
-    
-    @staticmethod
+
+    @staticmethod  # noqa: C901
     def _check_normalize_args(kwargs):
-        
+
         for k in kwargs:
-            if not k in ["block_dur", "hop_dur", "block_size", "hop_size", "max_time", "record",
-                         "audio_source", "filename", "data_buffer", "frames_per_buffer", "sampling_rate",
-                         "sample_width", "channels", "sr", "sw", "ch", "asrc", "fn", "fpb", "db", "mt",
-                         "rec", "bd", "hd", "bs", "hs"]:
+            if k not in [
+                "block_dur",
+                "hop_dur",
+                "block_size",
+                "hop_size",
+                "max_time",
+                "record",
+                "audio_source",
+                "filename",
+                "data_buffer",
+                "frames_per_buffer",
+                "sampling_rate",
+                "sample_width",
+                "channels",
+                "sr",
+                "sw",
+                "ch",
+                "asrc",
+                "fn",
+                "fpb",
+                "db",
+                "mt",
+                "rec",
+                "bd",
+                "hd",
+                "bs",
+                "hs",
+            ]:
                 raise ValueError("Invalid argument: {0}".format(k))
-        
+
         if "block_dur" in kwargs and "bd" in kwargs:
-            raise DuplicateArgument("Either 'block_dur' or 'bd' must be specified, not both")
-        
+            raise DuplicateArgument(
+                "Either 'block_dur' or 'bd' must be specified, not both"
+            )
+
         if "hop_dur" in kwargs and "hd" in kwargs:
-            raise DuplicateArgument("Either 'hop_dur' or 'hd' must be specified, not both")
-        
+            raise DuplicateArgument(
+                "Either 'hop_dur' or 'hd' must be specified, not both"
+            )
+
         if "block_size" in kwargs and "bs" in kwargs:
-            raise DuplicateArgument("Either 'block_size' or 'bs' must be specified, not both")
-        
+            raise DuplicateArgument(
+                "Either 'block_size' or 'bs' must be specified, not both"
+            )
+
         if "hop_size" in kwargs and "hs" in kwargs:
-            raise DuplicateArgument("Either 'hop_size' or 'hs' must be specified, not both")
-        
+            raise DuplicateArgument(
+                "Either 'hop_size' or 'hs' must be specified, not both"
+            )
+
         if "max_time" in kwargs and "mt" in kwargs:
-            raise DuplicateArgument("Either 'max_time' or 'mt' must be specified, not both")
-        
+            raise DuplicateArgument(
+                "Either 'max_time' or 'mt' must be specified, not both"
+            )
+
         if "audio_source" in kwargs and "asrc" in kwargs:
-            raise DuplicateArgument("Either 'audio_source' or 'asrc' must be specified, not both")
-        
+            raise DuplicateArgument(
+                "Either 'audio_source' or 'asrc' must be specified, not both"
+            )
+
         if "filename" in kwargs and "fn" in kwargs:
-            raise DuplicateArgument("Either 'filename' or 'fn' must be specified, not both")
-        
+            raise DuplicateArgument(
+                "Either 'filename' or 'fn' must be specified, not both"
+            )
+
         if "data_buffer" in kwargs and "db" in kwargs:
-            raise DuplicateArgument("Either 'filename' or 'db' must be specified, not both")
-        
+            raise DuplicateArgument(
+                "Either 'filename' or 'db' must be specified, not both"
+            )
+
         if "frames_per_buffer" in kwargs and "fbb" in kwargs:
-            raise DuplicateArgument("Either 'frames_per_buffer' or 'fpb' must be specified, not both")
-        
+            raise DuplicateArgument(
+                "Either 'frames_per_buffer' or 'fpb' must be specified, not "
+                "both"
+            )
+
         if "sampling_rate" in kwargs and "sr" in kwargs:
-            raise DuplicateArgument("Either 'sampling_rate' or 'sr' must be specified, not both")
-        
+            raise DuplicateArgument(
+                "Either 'sampling_rate' or 'sr' must be specified, not both"
+            )
+
         if "sample_width" in kwargs and "sw" in kwargs:
-            raise DuplicateArgument("Either 'sample_width' or 'sw' must be specified, not both")
-        
+            raise DuplicateArgument(
+                "Either 'sample_width' or 'sw' must be specified, not both"
+            )
+
         if "channels" in kwargs and "ch" in kwargs:
-            raise DuplicateArgument("Either 'channels' or 'ch' must be specified, not both")
-        
+            raise DuplicateArgument(
+                "Either 'channels' or 'ch' must be specified, not both"
+            )
+
         if "record" in kwargs and "rec" in kwargs:
-            raise DuplicateArgument("Either 'record' or 'rec' must be specified, not both")
-        
-        
+            raise DuplicateArgument(
+                "Either 'record' or 'rec' must be specified, not both"
+            )
+
         kwargs["bd"] = kwargs.pop("block_dur", None) or kwargs.pop("bd", None)
         kwargs["hd"] = kwargs.pop("hop_dur", None) or kwargs.pop("hd", None)
         kwargs["bs"] = kwargs.pop("block_size", None) or kwargs.pop("bs", None)
         kwargs["hs"] = kwargs.pop("hop_size", None) or kwargs.pop("hs", None)
         kwargs["mt"] = kwargs.pop("max_time", None) or kwargs.pop("mt", None)
-        kwargs["asrc"] = kwargs.pop("audio_source", None) or kwargs.pop("asrc", None)
+        kwargs["asrc"] = kwargs.pop("audio_source", None) or kwargs.pop(
+            "asrc", None
+        )
         kwargs["fn"] = kwargs.pop("filename", None) or kwargs.pop("fn", None)
         kwargs["db"] = kwargs.pop("data_buffer", None) or kwargs.pop("db", None)
-        
+
         record = kwargs.pop("record", False)
         if not record:
             record = kwargs.pop("rec", False)
             if not isinstance(record, bool):
                 raise TypeError("'record' must be a boolean")
-            
+
         kwargs["rec"] = record
-        
-        # keep long names for arguments meant for BufferAudioSource and PyAudioSource
+
+        # keep long names for arguments meant for BufferAudioSource
+        # and PyAudioSource
         if "frames_per_buffer" in kwargs or "fpb" in kwargs:
-            kwargs["frames_per_buffer"] = kwargs.pop("frames_per_buffer", None) or kwargs.pop("fpb", None)
-        
+            kwargs["frames_per_buffer"] = kwargs.pop(
+                "frames_per_buffer", None
+            ) or kwargs.pop("fpb", None)
+
         if "sampling_rate" in kwargs or "sr" in kwargs:
-            kwargs["sampling_rate"] = kwargs.pop("sampling_rate", None) or kwargs.pop("sr", None)
-        
-        if "sample_width" in kwargs or "sw" in kwargs:    
-            kwargs["sample_width"] = kwargs.pop("sample_width", None) or kwargs.pop("sw", None)
-        
+            kwargs["sampling_rate"] = kwargs.pop(
+                "sampling_rate", None
+            ) or kwargs.pop("sr", None)
+
+        if "sample_width" in kwargs or "sw" in kwargs:
+            kwargs["sample_width"] = kwargs.pop(
+                "sample_width", None
+            ) or kwargs.pop("sw", None)
+
         if "channels" in kwargs or "ch" in kwargs:
-            kwargs["channels"] = kwargs.pop("channels", None) or kwargs.pop("ch", None)
-        
-        
-        
-        
-            
-            
-    
+            kwargs["channels"] = kwargs.pop("channels", None) or kwargs.pop(
+                "ch", None
+            )
+
     @staticmethod
     def ads(**kwargs):
-        
         """
-        Create an return an :class:`ADSFactory.AudioDataSource`. The type and behavior of the object is the result
-        of the supplied parameters.
-        
-        :Parameters:
-        
-        *No parameters* :  
-           read audio data from the available built-in microphone with the default parameters.
-           The returned :class:`ADSFactory.AudioDataSource` encapsulate an :class:`io.PyAudioSource` object and hence 
-           it accepts the next four parameters are passed to use instead of their default values.
-        
-        `sampling_rate`, `sr` : *(int)*
-            number of samples per second. Default = 16000.
-        
-        `sample_width`, `sw` : *(int)*
-            number of bytes per sample (must be in (1, 2, 4)). Default = 2
-        
-        `channels`, `ch` : *(int)*
-            number of audio channels. Default = 1 (only this value is currently accepted)  
-            
-        `frames_per_buffer`, `fpb` : *(int)*
-            number of samples of PyAudio buffer. Default = 1024.
-        
-        `audio_source`, `asrc` : an `AudioSource` object
-            read data from this audio source
-            
-        `filename`, `fn` : *(string)*
-            build an `io.AudioSource` object using this file (currently only wave format is supported)
-            
-        `data_buffer`, `db` : *(string)*
-            build an `io.BufferAudioSource` using data in `data_buffer`. If this keyword is used,
-            `sampling_rate`, `sample_width` and `channels` are passed to `io.BufferAudioSource`
-            constructor and used instead of default values.
-            
-        `max_time`, `mt` : *(float)*
-            maximum time (in seconds) to read. Default behavior: read until there is no more data
-            available. 
-         
-        `record`, `rec` : *(bool)*
-            save all read data in cache. Provide a navigable object which boasts a `rewind` method.
-            Default = False.
-        
-        `block_dur`, `bd` : *(float)*
-            processing block duration in seconds. This represents the quantity of audio data to return 
-            each time the :func:`read` method is invoked. If `block_dur` is 0.025 (i.e. 25 ms) and the sampling
-            rate is 8000 and the sample width is 2 bytes, :func:`read` returns a buffer of 0.025 * 8000 * 2 = 400
-            bytes at most. This parameter will be looked for (and used if available) before `block_size`.
-            If neither parameter is given, `block_dur` will be set to 0.01 second (i.e. 10 ms)
-            
-            
-        `hop_dur`, `hd` : *(float)*
-            quantity of data to skip from current processing window. if `hop_dur` is supplied then there
-            will be an overlap of `block_dur` - `hop_dur` between two adjacent blocks. This
-            parameter will be looked for (and used if available) before `hop_size`. If neither parameter
-            is given, `hop_dur` will be set to `block_dur` which means that there will be no overlap
-            between two consecutively read blocks.
-             
-        `block_size`, `bs` : *(int)*
-            number of samples to read each time the `read` method is called. Default: a block size
-            that represents a window of 10ms, so for a sampling rate of 16000, the default `block_size`
-            is 160 samples, for a rate of 44100, `block_size` = 441 samples, etc.
-        
-        `hop_size`, `hs` : *(int)*
-            determines the number of overlapping samples between two adjacent read windows. For a
-            `hop_size` of value *N*, the overlap is `block_size` - *N*. Default : `hop_size` = `block_size`,
-            means that there is no overlap.
-            
-        :Returns:
-        
-        An AudioDataSource object that has the desired features.
-        
-        :Exampels:
-        
-        1. **Create an AudioDataSource that reads data from the microphone (requires Pyaudio) with default audio parameters:**
-        
-        .. code:: python
-        
-            from auditok import ADSFactory
-            ads = ADSFactory.ads()
-            ads.get_sampling_rate()
-            16000
-            ads.get_sample_width()
-            2
-            ads.get_channels()
-            1
-        
-        
-        2. **Create an AudioDataSource that reads data from the microphone with a sampling rate of 48KHz:**
-        
-        .. code:: python
-        
-            from auditok import ADSFactory
-            ads = ADSFactory.ads(sr=48000)
-            ads.get_sampling_rate()
-            48000
-        
-        3. **Create an AudioDataSource that reads data from a wave file:**
-        
-        .. code:: python
-        
-            import auditok
-            from auditok import ADSFactory
-            ads = ADSFactory.ads(fn=auditok.dataset.was_der_mensch_saet_mono_44100_lead_trail_silence)
-            ads.get_sampling_rate()
-            44100
-            ads.get_sample_width()
-            2
-            ads.get_channels()
-            1
-        
-        4. **Define size of read blocks as 20 ms**
-        
-        .. code:: python
-        
-            import auditok
-            from auditok import ADSFactory
-            '''
-            we know samling rate for previous file is 44100 samples/second
-            so 10 ms are equivalent to 441 samples and 20 ms to 882
-            '''
-            block_size = 882
-            ads = ADSFactory.ads(bs = 882, fn=auditok.dataset.was_der_mensch_saet_mono_44100_lead_trail_silence)
-            ads.open()
-            # read one block
-            data = ads.read()
-            ads.close()
-            len(data)
-            1764
-            assert len(data) ==  ads.get_sample_width() * block_size
-        
-        5. **Define block size as a duration (use block_dur or bd):**
-        
-        .. code:: python
-        
-            import auditok
-            from auditok import ADSFactory
-            dur = 0.25 # second
-            ads = ADSFactory.ads(bd = dur, fn=auditok.dataset.was_der_mensch_saet_mono_44100_lead_trail_silence)
-            '''
-            we know samling rate for previous file is 44100 samples/second
-            for a block duration of 250 ms, block size should be 0.25 * 44100 = 11025
-            '''
-            ads.get_block_size()
-            11025
-            assert ads.get_block_size() ==  int(0.25 * 44100)
-            ads.open()
-            # read one block
-            data = ads.read()
-            ads.close()
-            len(data)
-            22050
-            assert len(data) ==  ads.get_sample_width() * ads.get_block_size()
-            
-        6. **Read overlapping blocks (one of hope_size, hs, hop_dur or hd > 0):**
-        
-        For better readability we'd better use :class:`auditok.io.BufferAudioSource` with a string buffer:
-
-        .. code:: python
-
-            import auditok
-            from auditok import ADSFactory
-            '''
-            we supply a data beffer instead of a file (keyword 'bata_buffer' or 'db')
-            sr : sampling rate = 16 samples/sec
-            sw : sample width = 1 byte
-            ch : channels = 1
-            '''
-            buffer = "abcdefghijklmnop" # 16 bytes = 1 second of data
-            bd = 0.250 # block duration = 250 ms = 4 bytes
-            hd = 0.125 # hop duration = 125 ms = 2 bytes 
-            ads = ADSFactory.ads(db = "abcdefghijklmnop", bd = bd, hd = hd, sr = 16, sw = 1, ch = 1)
-            ads.open()
-            ads.read()
-            'abcd'
-            ads.read()
-            'cdef'
-            ads.read()
-            'efgh'
-            ads.read()
-            'ghij'
-            data = ads.read()
-            assert data == 'ijkl'
-        
-        7. **Limit amount of read data (use max_time or mt):**
-        
-        .. code:: python
-        
-            '''
-            We know audio file is larger than 2.25 seconds
-            We want to read up to 2.25 seconds of audio data
-            '''
-            ads = ADSFactory.ads(mt = 2.25, fn=auditok.dataset.was_der_mensch_saet_mono_44100_lead_trail_silence)
-            ads.open()
-            data = []
-            while True:
-                d = ads.read()
-                if d is None:
-                    break
-                data.append(d)
-                
-            ads.close()
-            data = b''.join(data)
-            assert len(data) == int(ads.get_sampling_rate() * 2.25 * ads.get_sample_width() * ads.get_channels())
+        Create an return an :class:`AudioDataSource`. The type and
+        behavior of the object is the result
+        of the supplied parameters. Called without any parameters, the class
+        will read audio data from the available built-in microphone with the
+        default parameters.
+
+        Parameters
+        ----------
+        sampling_rate, sr : int, default: 16000
+            number of audio samples per second of input audio stream.
+        sample_width, sw : int, default: 2
+            number of bytes per sample, must be one of 1, 2 or 4
+        channels, ch : int, default: 1
+            number of audio channels, only a value of 1 is currently accepted.
+        frames_per_buffer, fpb : int, default: 1024
+            number of samples of PyAudio buffer.
+        audio_source, asrc : `AudioSource`
+            `AudioSource` to read data from
+        filename, fn : str
+            create an `AudioSource` object using this file
+        data_buffer, db : str
+            build an `io.BufferAudioSource` using data in `data_buffer`.
+            If this keyword is used,
+            `sampling_rate`, `sample_width` and `channels` are passed to
+            `io.BufferAudioSource` constructor and used instead of default
+            values.
+        max_time, mt : float
+            maximum time (in seconds) to read. Default behavior: read until
+            there is no more data
+            available.
+        record, rec : bool, default = False
+            save all read data in cache. Provide a navigable object which has a
+            `rewind` method.
+        block_dur, bd : float
+            processing block duration in seconds. This represents the quantity
+            of audio data to return each time the :func:`read` method is
+            invoked. If `block_dur` is 0.025 (i.e. 25 ms) and the sampling rate
+            is 8000 and the sample width is 2 bytes, :func:`read` returns a
+            buffer of 0.025 * 8000 * 2 = 400 bytes at most. This parameter will
+            be looked for (and used if available) before `block_size`. If
+            neither parameter is given, `block_dur` will be set to 0.01 second
+            (i.e. 10 ms)
+        hop_dur, hd : float
+            quantity of data to skip from current processing window. if
+            `hop_dur` is supplied then there will be an overlap of `block_dur`
+            - `hop_dur` between two adjacent blocks. This parameter will be
+            looked for (and used if available) before `hop_size`.
+            If neither parameter is given, `hop_dur` will be set to `block_dur`
+            which means that there will be no overlap between two consecutively
+            read blocks.
+        block_size, bs : int
+            number of samples to read each time the `read` method is called.
+            Default: a block size that represents a window of 10ms, so for a
+            sampling rate of 16000, the default `block_size` is 160 samples,
+            for a rate of 44100, `block_size` = 441 samples, etc.
+        hop_size, hs : int
+            determines the number of overlapping samples between two adjacent
+            read windows. For a `hop_size` of value *N*, the overlap is
+            `block_size` - *N*. Default : `hop_size` = `block_size`, means that
+            there is no overlap.
+
+        Returns
+        -------
+        audio_data_source : AudioDataSource
+            an `AudioDataSource` object build with input parameters.
         """
-        
-        # copy user's dicionary (shallow copy)
-        kwargs = kwargs.copy()
-        
+        warnings.warn(
+            "'ADSFactory' is deprecated and will be removed in a future "
+            "release. Please use AudioReader class instead.",
+            DeprecationWarning,
+        )
+
         # check and normalize keyword arguments
         ADSFactory._check_normalize_args(kwargs)
-        
+
         block_dur = kwargs.pop("bd")
         hop_dur = kwargs.pop("hd")
         block_size = kwargs.pop("bs")
@@ -452,431 +628,483 @@ class ADSFactory:
         filename = kwargs.pop("fn")
         data_buffer = kwargs.pop("db")
         record = kwargs.pop("rec")
-        
+
         # Case 1: an audio source is supplied
         if audio_source is not None:
             if (filename, data_buffer) != (None, None):
-                raise Warning("You should provide one of 'audio_source', 'filename' or 'data_buffer'\
-                 keyword parameters. 'audio_source' will be used")
-            
+                raise Warning(
+                    "You should provide one of 'audio_source', 'filename' or \
+                    'data_buffer' keyword parameters. 'audio_source' will be \
+                    used"
+                )
+
         # Case 2: a file name is supplied
         elif filename is not None:
             if data_buffer is not None:
-                raise Warning("You should provide one of 'filename' or 'data_buffer'\
-                 keyword parameters. 'filename' will be used")
+                raise Warning(
+                    "You should provide one of 'filename' or 'data_buffer'\
+                 keyword parameters. 'filename' will be used"
+                )
             audio_source = from_file(filename)
-            
-        # Case 3: a data_buffer is supplied 
+
+        # Case 3: a data_buffer is supplied
         elif data_buffer is not None:
-            audio_source = BufferAudioSource(data_buffer = data_buffer, **kwargs)
-            
+            audio_source = BufferAudioSource(data=data_buffer, **kwargs)
+
         # Case 4: try to access native audio input
         else:
             audio_source = PyAudioSource(**kwargs)
-             
-             
+
         if block_dur is not None:
             if block_size is not None:
-                raise DuplicateArgument("Either 'block_dur' or 'block_size' can be specified, not both")
-            else:
-                block_size = int(audio_source.get_sampling_rate() * block_dur)
-        elif block_size is None:
-            # Set default block_size to 10 ms
-            block_size = int(audio_source.get_sampling_rate() / 100)
-
-        # Instantiate base AudioDataSource  
-        ads = ADSFactory.AudioDataSource(audio_source=audio_source, block_size=block_size)
-        
-        # Limit data to be read
-        if max_time is not None:
-            ads = ADSFactory.LimiterADS(ads=ads, max_time=max_time)
-        
-        # Record, rewind and reuse data
-        if record:
-            ads = ADSFactory.RecorderADS(ads=ads)
-            
+                raise DuplicateArgument(
+                    "Either 'block_dur' or 'block_size' can be specified, not \
+                    both"
+                )
+        elif block_size is not None:
+            block_dur = block_size / audio_source.sr
+        else:
+            block_dur = 0.01  # 10 ms
+
         # Read overlapping blocks of data
         if hop_dur is not None:
             if hop_size is not None:
-                raise DuplicateArgument("Either 'hop_dur' or 'hop_size' can be specified, not both")
-            else:
-                hop_size = int(audio_source.get_sampling_rate() * hop_dur)
-            
-        if hop_size is not None:
-            if hop_size <= 0 or  hop_size > block_size:
-                raise ValueError("hop_size must be > 0 and <= block_size")
-            if hop_size < block_size:
-                ads = ADSFactory.OverlapADS(ads=ads, hop_size=hop_size)
-        
+                raise DuplicateArgument(
+                    "Either 'hop_dur' or 'hop_size' can be specified, not both"
+                )
+        elif hop_size is not None:
+            hop_dur = hop_size / audio_source.sr
+
+        ads = AudioDataSource(
+            audio_source,
+            block_dur=block_dur,
+            hop_dur=hop_dur,
+            record=record,
+            max_read=max_time,
+        )
         return ads
-        
-        
-    class AudioDataSource(DataSource):
-        """
-        Base class for AudioDataSource objects.
-        It inherits from DataSource and encapsulates an AudioSource object.
-        """
-        
-        def __init__(self, audio_source, block_size):
-            
-            self.audio_source = audio_source
-            self.block_size = block_size
-                
-        def get_block_size(self):
-            return self.block_size
-        
-        def set_block_size(self, size):
-            self.block_size = size
-
-        def get_audio_source(self):
-            return self.audio_source
-        
-        def set_audio_source(self, audio_source):
-            self.audio_source = audio_source
-            
-        def open(self):
-            self.audio_source.open()
-        
-        def close(self):
-            self.audio_source.close()
-            
-        def is_open(self):
-            return self.audio_source.is_open()
-        
-        def get_sampling_rate(self):
-            return self.audio_source.get_sampling_rate()
-        
-        def get_sample_width(self):
-            return self.audio_source.get_sample_width()
-        
-        def get_channels(self):
-            return self.audio_source.get_channels()
-        
-        
-        def rewind(self):
-            if isinstance(self.audio_source, Rewindable):
-                self.audio_source.rewind()
-            else:
-                raise Exception("Audio source is not rewindable")
-            
-            
-        
-        def is_rewindable(self):
-            return isinstance(self.audio_source, Rewindable)
-        
-            
-        def read(self):
-            return self.audio_source.read(self.block_size)
-
-
-    class ADSDecorator(AudioDataSource):
-        """
-        Base decorator class for AudioDataSource objects.
-        """
-        __metaclass__ = ABCMeta
-        
-        def __init__(self, ads):
-            self.ads = ads
-            
-            self.get_block_size = self.ads.get_block_size
-            self.set_block_size = self.ads.set_block_size
-            self.get_audio_source = self.ads.get_audio_source
-            self.open = self.ads.open
-            self.close = self.ads.close
-            self.is_open = self.ads.is_open
-            self.get_sampling_rate = self.ads.get_sampling_rate
-            self.get_sample_width = self.ads.get_sample_width
-            self.get_channels = self.ads.get_channels
-        
-        def is_rewindable(self):
-            return self.ads.is_rewindable
-            
-        def rewind(self):
-            self.ads.rewind()
-            self._reinit()
-            
-        def set_audio_source(self, audio_source):
-            self.ads.set_audio_source(audio_source)
-            self._reinit()
-        
-        def open(self):
-            if not self.ads.is_open():
-                self.ads.open()
-                self._reinit()
-            
-        @abstractmethod
-        def _reinit(self):
-            pass            
-        
-        
-    class OverlapADS(ADSDecorator):
-        """
-        A class for AudioDataSource objects that can read and return overlapping audio frames
-        """
-        
-        def __init__(self, ads, hop_size):
-            ADSFactory.ADSDecorator.__init__(self, ads)
-            
-            if hop_size <= 0 or hop_size > self.get_block_size():
-                raise ValueError("hop_size must be either 'None' or \
-                 between 1 and block_size (both inclusive)")
-            self.hop_size = hop_size
-            self._actual_block_size = self.get_block_size()
-            self._reinit()
-            
-            
-            def _get_block_size():
-                return self._actual_block_size
-            
-            
-        def _read_first_block(self):
-            # For the first call, we need an entire block of size 'block_size'
-            block = self.ads.read()
-            if block is None:
-                return None
-            
-            # Keep a slice of data in cache and append it in the next call
-            if len(block) > self._hop_size_bytes:
-                self._cache = block[self._hop_size_bytes:]
-            
-            # Up from the next call, we will use '_read_next_blocks'
-            # and we only read 'hop_size'
-            self.ads.set_block_size(self.hop_size)
-            self.read = self._read_next_blocks
-            
-            return block
-                
-        def _read_next_blocks(self):
-            block = self.ads.read()
-            if block is None:
-                return None
-            
-            # Append block to cache data to ensure overlap
-            block = self._cache + block
-            # Keep a slice of data in cache only if we have a full length block
-            # if we don't that means that this is the last block
-            if len(block) == self._block_size_bytes:
-                self._cache = block[self._hop_size_bytes:]
-            else:
-                self._cache = None
-                
-            return block
 
-        def read(self):
-            pass
-        
-        def _reinit(self):
+
+class _AudioReadingProxy:
+    def __init__(self, audio_source):
+
+        self._audio_source = audio_source
+
+    def rewind(self):
+        if self.rewindable:
+            self._audio_source.rewind()
+        else:
+            raise AudioIOError("Audio stream is not rewindable")
+
+    def rewindable(self):
+        try:
+            return self._audio_source.rewindable
+        except AttributeError:
+            return False
+
+    def is_open(self):
+        return self._audio_source.is_open()
+
+    def open(self):
+        self._audio_source.open()
+
+    def close(self):
+        self._audio_source.close()
+
+    def read(self, size):
+        return self._audio_source.read(size)
+
+    @property
+    def data(self):
+        err_msg = "This AudioReader is not a recorder, no recorded data can "
+        err_msg += "be retrieved"
+        raise AttributeError(err_msg)
+
+    def __getattr__(self, name):
+        return getattr(self._audio_source, name)
+
+
+class _Recorder(_AudioReadingProxy):
+    """
+    Class for `AudioReader` objects that can record all data they read. Useful
+    when reading data from microphone.
+    """
+
+    def __init__(self, audio_source):
+        super(_Recorder, self).__init__(audio_source)
+        self._cache = []
+        self._read_block = self._read_and_cache
+        self._read_from_cache = False
+        self._data = None
+
+    def read(self, size):
+        return self._read_block(size)
+
+    @property
+    def data(self):
+        if self._data is None:
+            err_msg = "Unrewinded recorder. `rewind` should be called before "
+            err_msg += "accessing recorded data"
+            raise RuntimeError(err_msg)
+        return self._data
+
+    def rewindable(self):
+        return True
+
+    def rewind(self):
+        if self._read_from_cache:
+            self._audio_source.rewind()
+        else:
+            self._data = b"".join(self._cache)
             self._cache = None
-            self.ads.set_block_size(self._actual_block_size)
-            self._hop_size_bytes = self.hop_size * \
-                               self.get_sample_width() * \
-                               self.get_channels()
-            self._block_size_bytes = self.get_block_size() * \
-                               self.get_sample_width() * \
-                               self.get_channels()
-            self.read = self._read_first_block
+            self._audio_source = BufferAudioSource(
+                self._data, self.sr, self.sw, self.ch
+            )
+            self._read_block = self._audio_source.read
+            self.open()
+            self._read_from_cache = True
 
+    def _read_and_cache(self, size):
+        # Read and save read data
+        block = self._audio_source.read(size)
+        if block is not None:
+            self._cache.append(block)
+        return block
 
 
-    class LimiterADS(ADSDecorator):
-        """
-        A class for AudioDataSource objects that can read a fixed amount of data.
-        This can be useful when reading data from the microphone or from large audio files.
-        """
-        
-        def __init__(self, ads, max_time):
-            ADSFactory.ADSDecorator.__init__(self, ads)
-            
-            self.max_time = max_time
-            self._reinit()
-            
-        def read(self):
-            if self._total_read_bytes >=  self._max_read_bytes:
-                return None
-            block = self.ads.read()
-            if block is None:
-                return None
-            self._total_read_bytes += len(block)
-            
-            if self._total_read_bytes >=  self._max_read_bytes:
-                self.close()
-            
-            return block
-                
-                
-        def _reinit(self):
-            self._max_read_bytes = int(self.max_time  * self.get_sampling_rate()) * \
-                                  self.get_sample_width() * \
-                                  self.get_channels()
-            self._total_read_bytes = 0
+class _Limiter(_AudioReadingProxy):
+    """
+    Class for `AudioReader` objects that can read a fixed amount of data.
+    This can be useful when reading data from the microphone or from large
+    audio files.
+    """
 
-            
+    def __init__(self, audio_source, max_read):
+        super(_Limiter, self).__init__(audio_source)
+        self._max_read = max_read
+        self._max_samples = round(max_read * self.sr)
+        self._bytes_per_sample = self.sw * self.ch
+        self._read_samples = 0
 
-    class RecorderADS(ADSDecorator):
-        """
-        A class for AudioDataSource objects that can record all audio data they read,
-        with a rewind facility.
-        """
-        
-        def __init__(self, ads):
-            ADSFactory.ADSDecorator.__init__(self, ads)
-            
-            self._reinit()
-            
-        def read(self):
-            pass
-        
-        def _read_and_rec(self):
-            # Read and save read data
-            block = self.ads.read()
-            if block is not None:
-                self._cache.append(block)
-            
+    @property
+    def data(self):
+        data = self._audio_source.data
+        max_read_bytes = self._max_samples * self._bytes_per_sample
+        return data[:max_read_bytes]
+
+    @property
+    def max_read(self):
+        return self._max_read
+
+    def read(self, size):
+        size = min(self._max_samples - self._read_samples, size)
+        if size <= 0:
+            return None
+        block = self._audio_source.read(size)
+        if block is None:
+            return None
+        self._read_samples += len(block) // self._bytes_per_sample
+        return block
+
+    def rewind(self):
+        super(_Limiter, self).rewind()
+        self._read_samples = 0
+
+
+class _FixedSizeAudioReader(_AudioReadingProxy):
+    """
+    Class to read fixed-size audio windows from source.
+    """
+
+    def __init__(self, audio_source, block_dur):
+        super(_FixedSizeAudioReader, self).__init__(audio_source)
+
+        if block_dur <= 0:
+            raise ValueError(
+                "block_dur must be > 0, given: {}".format(block_dur)
+            )
+
+        self._block_size = int(block_dur * self.sr)
+        if self._block_size == 0:
+            err_msg = "Too small block_dur ({0:f}) for sampling rate ({1}). "
+            err_msg += "block_dur should cover at least one sample "
+            err_msg += "(i.e. 1/{1})"
+            raise TooSamllBlockDuration(
+                err_msg.format(block_dur, self.sr), block_dur, self.sr
+            )
+
+    def read(self):
+        return self._audio_source.read(self._block_size)
+
+    @property
+    def block_size(self):
+        return self._block_size
+
+    @property
+    def block_dur(self):
+        return self._block_size / self.sr
+
+    def __getattr__(self, name):
+        return getattr(self._audio_source, name)
+
+
+class _OverlapAudioReader(_FixedSizeAudioReader):
+    """
+    Class for `AudioReader` objects that can read and return overlapping audio
+    windows.
+    """
+
+    def __init__(self, audio_source, block_dur, hop_dur):
+
+        if hop_dur >= block_dur:
+            raise ValueError('"hop_dur" should be < "block_dur"')
+
+        super(_OverlapAudioReader, self).__init__(audio_source, block_dur)
+
+        self._hop_size = int(hop_dur * self.sr)
+        self._blocks = self._iter_blocks_with_overlap()
+
+    def _iter_blocks_with_overlap(self):
+        while not self.is_open():
+            yield AudioIOError
+        block = self._audio_source.read(self._block_size)
+        if block is None:
+            yield None
+
+        _hop_size_bytes = (
+            self._hop_size * self._audio_source.sw * self._audio_source.ch
+        )
+        cache = block[_hop_size_bytes:]
+        yield block
+
+        while True:
+            block = self._audio_source.read(self._hop_size)
+            if block:
+                block = cache + block
+                cache = block[_hop_size_bytes:]
+                yield block
+                continue
+            yield None
+
+    def read(self):
+        try:
+            block = next(self._blocks)
+            if block == AudioIOError:
+                raise AudioIOError("Audio Stream is not open.")
             return block
-            
-            
-        def _read_simple(self):
-            # Read without recording
-            return self.ads.read()
-
-        def rewind(self):
-            if self._record:
-                # If has been recording, create a new BufferAudioSource
-                # from recorded data
-                dbuffer = self._concatenate(self._cache)
-                asource = BufferAudioSource(dbuffer, self.get_sampling_rate(),
-                                             self.get_sample_width(),
-                                             self.get_channels())
-                
-                
-                self.set_audio_source(asource)
-                self.open()
-                self._cache = []
-                self._record = False
-                self.read = self._read_simple
-            
-            else:
-                self.ads.rewind()
-                if not self.is_open():
-                    self.open()
-                    
-        
-        def is_rewindable(self):
-            return True
-        
-        def _reinit(self):
-            # when audio_source is replaced, start recording again
-            self._record = True
-            self._cache = []
-            self.read = self._read_and_rec
-        
-        def _concatenate(self, data):
-            try:
-                # should always work for python 2
-                # work for python 3 ONLY if data is a list (or an iterator)
-                # whose each element is a 'bytes' objects
-                return b''.join(data)
-            except TypeError:
-                # work for 'str' in python 2 and python 3
-                return ''.join(data)
+        except StopIteration:
+            return None
 
+    def rewind(self):
+        super(_OverlapAudioReader, self).rewind()
+        self._blocks = self._iter_blocks_with_overlap()
 
-class AudioEnergyValidator(DataValidator):
+    @property
+    def hop_size(self):
+        return self._hop_size
+
+    @property
+    def hop_dur(self):
+        return self._hop_size / self.sr
+
+    def __getattr__(self, name):
+        return getattr(self._audio_source, name)
+
+
+class AudioReader(DataSource):
     """
-    The most basic auditok audio frame validator.
-    This validator computes the log energy of an input audio frame
-    and return True if the result is >= a given threshold, False 
-    otherwise.
-    
-    :Parameters:
-    
-    `sample_width` : *(int)*
-        Number of bytes of one audio sample. This is used to convert data from `basestring` or `Bytes` to
-        an array of floats.
-        
-    `energy_threshold` : *(float)*
-        A threshold used to check whether an input data buffer is valid.
+    Class to read fixed-size chunks of audio data from a source. A source can
+    be a file on disk, standard input (with `input` = "-") or microphone. This
+    is normally used by tokenization algorithms that expect source objects with
+    a `read` function that returns a windows of data of the same size at each
+    call expect when remaining data does not make up a full window.
+
+    Objects of this class can be set up to return audio windows with a given
+    overlap and to record the whole stream for later access (useful when
+    reading data from the microphone). They can also have
+    a limit for the maximum amount of data to read.
+
+    Parameters
+    ----------
+    input : str, bytes, AudioSource, AudioReader, AudioRegion or None
+        input audio data. If the type of the passed argument is `str`, it should
+        be a path to an existing audio file. "-" is interpreted as standardinput.
+        If the type is `bytes`, input is considered as a buffer of raw audio
+        data. If None, read audio from microphone. Every object that is not an
+        :class:`AudioReader` will be transformed, when possible, into an
+        :class:`AudioSource` before processing. If it is an `str` that refers to
+        a raw audio file, `bytes` or None, audio parameters should be provided
+        using kwargs (i.e., `samplig_rate`, `sample_width` and `channels` or
+        their alias).
+    block_dur: float, default: 0.01
+        length in seconds of audio windows to return at each `read` call.
+    hop_dur: float, default: None
+        length in seconds of data amount to skip from previous window. If
+        defined, it is used to compute the temporal overlap between previous and
+        current window (nameply `overlap = block_dur - hop_dur`). Default, None,
+        means that consecutive windows do not overlap.
+    record: bool, default: False
+        whether to record read audio data for later access. If True, audio data
+        can be retrieved by first calling `rewind()`, then using the `data`
+        property. Note that once `rewind()` is called, no new data will be read
+        from source (subsequent `read()` call will read data from cache) and
+        that there's no need to call `rewind()` again to access `data` property.
+    max_read: float, default: None
+        maximum amount of audio data to read in seconds. Default is None meaning
+        that data will be read until end of stream is reached or, when reading
+        from microphone a Ctrl-C is sent.
+
+    When `input` is None, of type bytes or a raw audio files some of the
+    follwing kwargs are mandatory.
+
+    Other Parameters
+    ----------------
+    audio_format, fmt : str
+        type of audio data (e.g., wav, ogg, flac, raw, etc.). This will only be
+        used if `input` is a string path to an audio file. If not given, audio
+        type will be guessed from file name extension or from file header.
+    sampling_rate, sr : int
+        sampling rate of audio data. Required if `input` is a raw audio file, is
+        a bytes object or None (i.e., read from microphone).
+    sample_width, sw : int
+        number of bytes used to encode one audio sample, typically 1, 2 or 4.
+        Required for raw data, see `sampling_rate`.
+    channels, ch : int
+        number of channels of audio data. Required for raw data, see
+        `sampling_rate`.
+    use_channel, uc : {None, "any", "mix", "avg", "average"} or int
+        which channel to use for split if `input` has multiple audio channels.
+        Regardless of which channel is used for splitting, returned audio events
+        contain data from *all* the channels of `input`. The following values
+        are accepted:
+
+        - None (alias "any"): accept audio activity from any channel, even if
+          other channels are silent. This is the default behavior.
+
+        - "mix" (alias "avg" or "average"): mix down all channels (i.e., compute
+          average channel) and split the resulting channel.
+
+        - int (>= 0 , < `channels`): use one channel, specified by its integer
+          id, for split.
+
+    large_file : bool, default: False
+        If True, AND if `input` is a path to a *wav* of a *raw* audio file
+        (and only these two formats) then audio data is lazily loaded to memory
+        (i.e., one analysis window a time). Otherwise the whole file is loaded
+        to memory before split. Set to True if the size of the file is larger
+        than available memory.
+    """
+
+    def __init__(
+        self,
+        input,
+        block_dur=0.01,
+        hop_dur=None,
+        record=False,
+        max_read=None,
+        **kwargs
+    ):
+        if not isinstance(input, AudioSource):
+            input = get_audio_source(input, **kwargs)
+        self._record = record
+        if record:
+            input = _Recorder(input)
+        if max_read is not None:
+            input = _Limiter(input, max_read)
+            self._max_read = max_read
+        if hop_dur is not None:
+            input = _OverlapAudioReader(input, block_dur, hop_dur)
+        else:
+            input = _FixedSizeAudioReader(input, block_dur)
+        self._audio_source = input
+
+    def __repr__(self):
+        block_dur, hop_dur, max_read = None, None, None
+        if self.block_dur is not None:
+            block_dur = "{:.3f}".format(self.block_dur)
+        if self.hop_dur is not None:
+            hop_dur = "{:.3f}".format(self.hop_dur)
+        if self.max_read is not None:
+            max_read = "{:.3f}".format(self.max_read)
+        return (
+            "{cls}(block_dur={block_dur}, "
+            "hop_dur={hop_dur}, record={rewindable}, "
+            "max_read={max_read})"
+        ).format(
+            cls=self.__class__.__name__,
+            block_dur=block_dur,
+            hop_dur=hop_dur,
+            rewindable=self._record,
+            max_read=max_read,
+        )
+
+    @property
+    def rewindable(self):
+        return self._record
+
+    @property
+    def block_dur(self):
+        return self._audio_source.block_size / self._audio_source.sr
+
+    @property
+    def hop_dur(self):
+        if hasattr(self._audio_source, "hop_dur"):
+            return self._audio_source.hop_size / self._audio_source.sr
+        return self.block_dur
+
+    @property
+    def hop_size(self):
+        if hasattr(self._audio_source, "hop_size"):
+            return self._audio_source.hop_size
+        return self.block_size
+
+    @property
+    def max_read(self):
+        try:
+            return self._audio_source.max_read
+        except AttributeError:
+            return None
+
+    def read(self):
+        return self._audio_source.read()
+
+    def __getattr__(self, name):
+        if name in ("data", "rewind") and not self.rewindable:
+            raise AttributeError(
+                "'AudioReader' has no attribute '{}'".format(name)
+            )
+        try:
+            return getattr(self._audio_source, name)
+        except AttributeError:
+            raise AttributeError(
+                "'AudioReader' has no attribute '{}'".format(name)
+            )
+
+
+# Keep AudioDataSource for compatibility
+# Remove in a future version when ADSFactory is removed
+AudioDataSource = AudioReader
+
+
+class Recorder(AudioReader):
+    """Class to read fixed-size chunks of audio data from a source and keeps
+    data in a cache. Using this class is equivalent to initializing
+    :class:`AudioReader` with `record=True`. For more information about the
+    other parameters see :class:`AudioReader`.
+
+    Once the desired amount of data is read, you can call the :func:`rewind`
+    method then get the recorded data via the :attr:`data` attribute. You can also
+    re-read cached data one window a time by calling :func:`read`.
     """
-    
-    
-    if _WITH_NUMPY:
-        
-        _formats = {1: numpy.int8 , 2: numpy.int16, 4: numpy.int32}
-
-        @staticmethod
-        def _convert(signal, sample_width):
-            return numpy.array(numpy.frombuffer(signal, dtype=AudioEnergyValidator._formats[sample_width]), dtype=numpy.float64)                             
-            
-        @staticmethod
-        def _signal_energy(signal):
-            return float(numpy.dot(signal, signal)) / len(signal)
-        
-        @staticmethod    
-        def _signal_log_energy(signal):
-            energy = AudioEnergyValidator._signal_energy(signal)
-            if energy <= 0:
-                return -200
-            return 10. * numpy.log10(energy)
-        
-    else:
-        
-        
-        _formats = {1: 'b' , 2: 'h', 4: 'i'}
-        
-        @staticmethod
-        def _convert(signal, sample_width):
-            return array("d", array(AudioEnergyValidator._formats[sample_width], signal))
-        
-        @staticmethod
-        def _signal_energy(signal):
-            energy = 0.
-            for a in signal:
-                energy += a * a
-            return energy / len(signal)
-        
-        @staticmethod    
-        def _signal_log_energy(signal):
-            energy = AudioEnergyValidator._signal_energy(signal)
-            if energy <= 0:
-                return -200
-            return 10. * math.log10(energy)
-            
-    
-    def __init__(self, sample_width, energy_threshold=45):
-        self.sample_width = sample_width
-        self._energy_threshold = energy_threshold
-        
-            
-    def is_valid(self, data):
-        """
-        Check if data is valid. Audio data will be converted into an array (of
-        signed values) of which the log energy is computed. Log energy is computed
-        as follows:
-        
-        .. code:: python
-        
-            arr = AudioEnergyValidator._convert(signal, sample_width)
-            energy = float(numpy.dot(arr, arr)) / len(arr)
-            log_energy = 10. * numpy.log10(energy)
-        
-        
-        :Parameters:
-        
-        `data` : either a *string* or a *Bytes* buffer
-            `data` is converted into a numerical array using the `sample_width`
-            given in the constructor.
-        
-        :Retruns:
-        
-        True if `log_energy` >= `energy_threshold`, False otherwise.
-        """
-        
-        signal = AudioEnergyValidator._convert(data, self.sample_width)
-        return AudioEnergyValidator._signal_log_energy(signal) >= self._energy_threshold
-    
-    def get_energy_threshold(self):
-        return self._energy_threshold
-    
-    def set_energy_threshold(self, threshold):
-        self._energy_threshold = threshold
 
+    def __init__(
+        self, input, block_dur=0.01, hop_dur=None, max_read=None, **kwargs
+    ):
+        super().__init__(
+            input,
+            block_dur=block_dur,
+            hop_dur=hop_dur,
+            record=True,
+            max_read=max_read,
+            **kwargs
+        )
diff --git a/libs/auditok/workers.py b/libs/auditok/workers.py
new file mode 100755
index 000000000..bb6d54a98
--- /dev/null
+++ b/libs/auditok/workers.py
@@ -0,0 +1,427 @@
+import os
+import sys
+from tempfile import NamedTemporaryFile
+from abc import ABCMeta, abstractmethod
+from threading import Thread
+from datetime import datetime, timedelta
+from collections import namedtuple
+import wave
+import subprocess
+from queue import Queue, Empty
+from .io import _guess_audio_format
+from .util import AudioDataSource, make_duration_formatter
+from .core import split
+from .exceptions import (
+    EndOfProcessing,
+    AudioEncodingError,
+    AudioEncodingWarning,
+)
+
+
+_STOP_PROCESSING = "STOP_PROCESSING"
+_Detection = namedtuple("_Detection", "id start end duration")
+
+
+def _run_subprocess(command):
+    try:
+        with subprocess.Popen(
+            command,
+            stdin=open(os.devnull, "rb"),
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        ) as proc:
+            stdout, stderr = proc.communicate()
+            return proc.returncode, stdout, stderr
+    except Exception:
+        err_msg = "Couldn't export audio using command: '{}'".format(command)
+        raise AudioEncodingError(err_msg)
+
+
+class Worker(Thread, metaclass=ABCMeta):
+    def __init__(self, timeout=0.5, logger=None):
+        self._timeout = timeout
+        self._logger = logger
+        self._inbox = Queue()
+        Thread.__init__(self)
+
+    def run(self):
+        while True:
+            message = self._get_message()
+            if message == _STOP_PROCESSING:
+                break
+            if message is not None:
+                self._process_message(message)
+        self._post_process()
+
+    @abstractmethod
+    def _process_message(self, message):
+        """Process incoming messages"""
+
+    def _post_process(self):
+        pass
+
+    def _log(self, message):
+        self._logger.info(message)
+
+    def _stop_requested(self):
+        try:
+            message = self._inbox.get_nowait()
+            if message == _STOP_PROCESSING:
+                return True
+        except Empty:
+            return False
+
+    def stop(self):
+        self.send(_STOP_PROCESSING)
+        self.join()
+
+    def send(self, message):
+        self._inbox.put(message)
+
+    def _get_message(self):
+        try:
+            message = self._inbox.get(timeout=self._timeout)
+            return message
+        except Empty:
+            return None
+
+
+class TokenizerWorker(Worker, AudioDataSource):
+    def __init__(self, reader, observers=None, logger=None, **kwargs):
+        self._observers = observers if observers is not None else []
+        self._reader = reader
+        self._audio_region_gen = split(self, **kwargs)
+        self._detections = []
+        self._log_format = "[DET]: Detection {0.id} (start: {0.start:.3f}, "
+        self._log_format += "end: {0.end:.3f}, duration: {0.duration:.3f})"
+        Worker.__init__(self, timeout=0.2, logger=logger)
+
+    def _process_message(self):
+        pass
+
+    @property
+    def detections(self):
+        return self._detections
+
+    def _notify_observers(self, message):
+        for observer in self._observers:
+            observer.send(message)
+
+    def run(self):
+        self._reader.open()
+        start_processing_timestamp = datetime.now()
+        for _id, audio_region in enumerate(self._audio_region_gen, start=1):
+            timestamp = start_processing_timestamp + timedelta(
+                seconds=audio_region.meta.start
+            )
+            audio_region.meta.timestamp = timestamp
+            detection = _Detection(
+                _id,
+                audio_region.meta.start,
+                audio_region.meta.end,
+                audio_region.duration,
+            )
+            self._detections.append(detection)
+            if self._logger is not None:
+                message = self._log_format.format(detection)
+                self._log(message)
+            self._notify_observers((_id, audio_region))
+        self._notify_observers(_STOP_PROCESSING)
+        self._reader.close()
+
+    def start_all(self):
+        for observer in self._observers:
+            observer.start()
+        self.start()
+
+    def stop_all(self):
+        self.stop()
+        for observer in self._observers:
+            observer.stop()
+        self._reader.close()
+
+    def read(self):
+        if self._stop_requested():
+            return None
+        else:
+            return self._reader.read()
+
+    def __getattr__(self, name):
+        return getattr(self._reader, name)
+
+
+class StreamSaverWorker(Worker):
+    def __init__(
+        self,
+        audio_reader,
+        filename,
+        export_format=None,
+        cache_size_sec=0.5,
+        timeout=0.2,
+    ):
+        self._reader = audio_reader
+        sample_size_bytes = self._reader.sw * self._reader.ch
+        self._cache_size = cache_size_sec * self._reader.sr * sample_size_bytes
+        self._output_filename = filename
+        self._export_format = _guess_audio_format(export_format, filename)
+        if self._export_format is None:
+            self._export_format = "wav"
+        self._init_output_stream()
+        self._exported = False
+        self._cache = []
+        self._total_cached = 0
+        Worker.__init__(self, timeout=timeout)
+
+    def _get_non_existent_filename(self):
+        filename = self._output_filename + ".wav"
+        i = 0
+        while os.path.exists(filename):
+            i += 1
+            filename = self._output_filename + "({}).wav".format(i)
+        return filename
+
+    def _init_output_stream(self):
+        if self._export_format != "wav":
+            self._tmp_output_filename = self._get_non_existent_filename()
+        else:
+            self._tmp_output_filename = self._output_filename
+        self._wfp = wave.open(self._tmp_output_filename, "wb")
+        self._wfp.setframerate(self._reader.sr)
+        self._wfp.setsampwidth(self._reader.sw)
+        self._wfp.setnchannels(self._reader.ch)
+
+    @property
+    def sr(self):
+        return self._reader.sampling_rate
+
+    @property
+    def sw(self):
+        return self._reader.sample_width
+
+    @property
+    def ch(self):
+        return self._reader.channels
+
+    def __del__(self):
+        self._post_process()
+
+        if (
+            (self._tmp_output_filename != self._output_filename)
+            and self._exported
+            and os.path.exists(self._tmp_output_filename)
+        ):
+            os.remove(self._tmp_output_filename)
+
+    def _process_message(self, data):
+        self._cache.append(data)
+        self._total_cached += len(data)
+        if self._total_cached >= self._cache_size:
+            self._write_cached_data()
+
+    def _post_process(self):
+        while True:
+            try:
+                data = self._inbox.get_nowait()
+                if data != _STOP_PROCESSING:
+                    self._cache.append(data)
+                    self._total_cached += len(data)
+            except Empty:
+                break
+        self._write_cached_data()
+        self._wfp.close()
+
+    def _write_cached_data(self):
+        if self._cache:
+            data = b"".join(self._cache)
+            self._wfp.writeframes(data)
+            self._cache = []
+            self._total_cached = 0
+
+    def open(self):
+        self._reader.open()
+
+    def close(self):
+        self._reader.close()
+        self.stop()
+
+    def rewind(self):
+        # ensure compatibility with AudioDataSource with record=True
+        pass
+
+    @property
+    def data(self):
+        with wave.open(self._tmp_output_filename, "rb") as wfp:
+            return wfp.readframes(-1)
+
+    def save_stream(self):
+        if self._exported:
+            return self._output_filename
+
+        if self._export_format in ("raw", "wav"):
+            if self._export_format == "raw":
+                self._export_raw()
+            self._exported = True
+            return self._output_filename
+        try:
+            self._export_with_ffmpeg_or_avconv()
+        except AudioEncodingError:
+            try:
+                self._export_with_sox()
+            except AudioEncodingError:
+                warn_msg = "Couldn't save audio data in the desired format "
+                warn_msg += "'{}'. Either none of 'ffmpeg', 'avconv' or 'sox' "
+                warn_msg += "is installed or this format is not recognized.\n"
+                warn_msg += "Audio file was saved as '{}'"
+                raise AudioEncodingWarning(
+                    warn_msg.format(
+                        self._export_format, self._tmp_output_filename
+                    )
+                )
+        finally:
+            self._exported = True
+        return self._output_filename
+
+    def _export_raw(self):
+        with open(self._output_filename, "wb") as wfp:
+            wfp.write(self.data)
+
+    def _export_with_ffmpeg_or_avconv(self):
+        command = [
+            "-y",
+            "-f",
+            "wav",
+            "-i",
+            self._tmp_output_filename,
+            "-f",
+            self._export_format,
+            self._output_filename,
+        ]
+        returncode, stdout, stderr = _run_subprocess(["ffmpeg"] + command)
+        if returncode != 0:
+            returncode, stdout, stderr = _run_subprocess(["avconv"] + command)
+            if returncode != 0:
+                raise AudioEncodingError(stderr)
+        return stdout, stderr
+
+    def _export_with_sox(self):
+        command = [
+            "sox",
+            "-t",
+            "wav",
+            self._tmp_output_filename,
+            self._output_filename,
+        ]
+        returncode, stdout, stderr = _run_subprocess(command)
+        if returncode != 0:
+            raise AudioEncodingError(stderr)
+        return stdout, stderr
+
+    def close_output(self):
+        self._wfp.close()
+
+    def read(self):
+        data = self._reader.read()
+        if data is not None:
+            self.send(data)
+        else:
+            self.send(_STOP_PROCESSING)
+        return data
+
+    def __getattr__(self, name):
+        if name == "data":
+            return self.data
+        return getattr(self._reader, name)
+
+
+class PlayerWorker(Worker):
+    def __init__(self, player, progress_bar=False, timeout=0.2, logger=None):
+        self._player = player
+        self._progress_bar = progress_bar
+        self._log_format = "[PLAY]: Detection {id} played"
+        Worker.__init__(self, timeout=timeout, logger=logger)
+
+    def _process_message(self, message):
+        _id, audio_region = message
+        if self._logger is not None:
+            message = self._log_format.format(id=_id)
+            self._log(message)
+        audio_region.play(
+            player=self._player, progress_bar=self._progress_bar, leave=False
+        )
+
+
+class RegionSaverWorker(Worker):
+    def __init__(
+        self,
+        filename_format,
+        audio_format=None,
+        timeout=0.2,
+        logger=None,
+        **audio_parameters
+    ):
+        self._filename_format = filename_format
+        self._audio_format = audio_format
+        self._audio_parameters = audio_parameters
+        self._debug_format = "[SAVE]: Detection {id} saved as '{filename}'"
+        Worker.__init__(self, timeout=timeout, logger=logger)
+
+    def _process_message(self, message):
+        _id, audio_region = message
+        filename = self._filename_format.format(
+            id=_id,
+            start=audio_region.meta.start,
+            end=audio_region.meta.end,
+            duration=audio_region.duration,
+        )
+        filename = audio_region.save(
+            filename, self._audio_format, **self._audio_parameters
+        )
+        if self._logger:
+            message = self._debug_format.format(id=_id, filename=filename)
+            self._log(message)
+
+
+class CommandLineWorker(Worker):
+    def __init__(self, command, timeout=0.2, logger=None):
+        self._command = command
+        Worker.__init__(self, timeout=timeout, logger=logger)
+        self._debug_format = "[COMMAND]: Detection {id} command: '{command}'"
+
+    def _process_message(self, message):
+        _id, audio_region = message
+        with NamedTemporaryFile(delete=False) as file:
+            filename = audio_region.save(file.name, audio_format="wav")
+            command = self._command.format(file=filename)
+            os.system(command)
+            if self._logger is not None:
+                message = self._debug_format.format(id=_id, command=command)
+                self._log(message)
+
+
+class PrintWorker(Worker):
+    def __init__(
+        self,
+        print_format="{start} {end}",
+        time_format="%S",
+        timestamp_format="%Y/%m/%d %H:%M:%S.%f",
+        timeout=0.2,
+    ):
+
+        self._print_format = print_format
+        self._format_time = make_duration_formatter(time_format)
+        self._timestamp_format = timestamp_format
+        self.detections = []
+        Worker.__init__(self, timeout=timeout)
+
+    def _process_message(self, message):
+        _id, audio_region = message
+        timestamp = audio_region.meta.timestamp
+        timestamp = timestamp.strftime(self._timestamp_format)
+        text = self._print_format.format(
+            id=_id,
+            start=self._format_time(audio_region.meta.start),
+            end=self._format_time(audio_region.meta.end),
+            duration=self._format_time(audio_region.duration),
+            timestamp=timestamp,
+        )
+        print(text)