Include dependencies and remove requirements.txt

author: morpheus65535 <[email protected]> 2018-09-16 20:27:00 -0400
committer: morpheus65535 <[email protected]> 2018-09-16 20:33:04 -0400
commit: 0f061f21226f91883c841f85ceef31b30981277a (patch)
tree: a1350723ae688ccbae4d4ca564cc4175ccc73996 /libs/gitdb
parent: 8b681d8a151a3b41d3aaa5bfdd7a082bdda7896c (diff)
download: bazarr-0f061f21226f91883c841f85ceef31b30981277a.tar.gz
bazarr-0f061f21226f91883c841f85ceef31b30981277a.zip
26 files changed, 5346 insertions, 0 deletions
diff --git a/libs/gitdb/__init__.py b/libs/gitdb/__init__.py
new file mode 100644
index 000000000..344c3de01
--- /dev/null
+++ b/libs/gitdb/__init__.py
@@ -0,0 +1,39 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Initialize the object database module"""
+
+import sys
+import os
+
+#{ Initialization
+
+
+def _init_externals():
+    """Initialize external projects by putting them into the path"""
+    for module in ('smmap',):
+        sys.path.append(os.path.join(os.path.dirname(__file__), 'ext', module))
+
+        try:
+            __import__(module)
+        except ImportError:
+            raise ImportError("'%s' could not be imported, assure it is located in your PYTHONPATH" % module)
+        # END verify import
+    # END handel imports
+
+#} END initialization
+
+_init_externals()
+
+__author__ = "Sebastian Thiel"
+__contact__ = "[email protected]"
+__homepage__ = "https://github.com/gitpython-developers/gitdb"
+version_info = (2, 0, 4)
+__version__ = '.'.join(str(i) for i in version_info)
+
+
+# default imports
+from gitdb.base import *
+from gitdb.db import *
+from gitdb.stream import *
diff --git a/libs/gitdb/base.py b/libs/gitdb/base.py
new file mode 100644
index 000000000..42e71d0fa
--- /dev/null
+++ b/libs/gitdb/base.py
@@ -0,0 +1,315 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module with basic data structures - they are designed to be lightweight and fast"""
+from gitdb.util import bin_to_hex
+
+from gitdb.fun import (
+    type_id_to_type_map,
+    type_to_type_id_map
+)
+
+__all__ = ('OInfo', 'OPackInfo', 'ODeltaPackInfo',
+           'OStream', 'OPackStream', 'ODeltaPackStream',
+           'IStream', 'InvalidOInfo', 'InvalidOStream')
+
+#{ ODB Bases
+
+
+class OInfo(tuple):
+
+    """Carries information about an object in an ODB, providing information
+    about the binary sha of the object, the type_string as well as the uncompressed size
+    in bytes.
+
+    It can be accessed using tuple notation and using attribute access notation::
+
+        assert dbi[0] == dbi.binsha
+        assert dbi[1] == dbi.type
+        assert dbi[2] == dbi.size
+
+    The type is designed to be as lightweight as possible."""
+    __slots__ = tuple()
+
+    def __new__(cls, sha, type, size):
+        return tuple.__new__(cls, (sha, type, size))
+
+    def __init__(self, *args):
+        tuple.__init__(self)
+
+    #{ Interface
+    @property
+    def binsha(self):
+        """:return: our sha as binary, 20 bytes"""
+        return self[0]
+
+    @property
+    def hexsha(self):
+        """:return: our sha, hex encoded, 40 bytes"""
+        return bin_to_hex(self[0])
+
+    @property
+    def type(self):
+        return self[1]
+
+    @property
+    def type_id(self):
+        return type_to_type_id_map[self[1]]
+
+    @property
+    def size(self):
+        return self[2]
+    #} END interface
+
+
+class OPackInfo(tuple):
+
+    """As OInfo, but provides a type_id property to retrieve the numerical type id, and
+    does not include a sha.
+
+    Additionally, the pack_offset is the absolute offset into the packfile at which
+    all object information is located. The data_offset property points to the absolute
+    location in the pack at which that actual data stream can be found."""
+    __slots__ = tuple()
+
+    def __new__(cls, packoffset, type, size):
+        return tuple.__new__(cls, (packoffset, type, size))
+
+    def __init__(self, *args):
+        tuple.__init__(self)
+
+    #{ Interface
+
+    @property
+    def pack_offset(self):
+        return self[0]
+
+    @property
+    def type(self):
+        return type_id_to_type_map[self[1]]
+
+    @property
+    def type_id(self):
+        return self[1]
+
+    @property
+    def size(self):
+        return self[2]
+
+    #} END interface
+
+
+class ODeltaPackInfo(OPackInfo):
+
+    """Adds delta specific information,
+    Either the 20 byte sha which points to some object in the database,
+    or the negative offset from the pack_offset, so that pack_offset - delta_info yields
+    the pack offset of the base object"""
+    __slots__ = tuple()
+
+    def __new__(cls, packoffset, type, size, delta_info):
+        return tuple.__new__(cls, (packoffset, type, size, delta_info))
+
+    #{ Interface
+    @property
+    def delta_info(self):
+        return self[3]
+    #} END interface
+
+
+class OStream(OInfo):
+
+    """Base for object streams retrieved from the database, providing additional
+    information about the stream.
+    Generally, ODB streams are read-only as objects are immutable"""
+    __slots__ = tuple()
+
+    def __new__(cls, sha, type, size, stream, *args, **kwargs):
+        """Helps with the initialization of subclasses"""
+        return tuple.__new__(cls, (sha, type, size, stream))
+
+    def __init__(self, *args, **kwargs):
+        tuple.__init__(self)
+
+    #{ Stream Reader Interface
+
+    def read(self, size=-1):
+        return self[3].read(size)
+
+    @property
+    def stream(self):
+        return self[3]
+
+    #} END stream reader interface
+
+
+class ODeltaStream(OStream):
+
+    """Uses size info of its stream, delaying reads"""
+
+    def __new__(cls, sha, type, size, stream, *args, **kwargs):
+        """Helps with the initialization of subclasses"""
+        return tuple.__new__(cls, (sha, type, size, stream))
+
+    #{ Stream Reader Interface
+
+    @property
+    def size(self):
+        return self[3].size
+
+    #} END stream reader interface
+
+
+class OPackStream(OPackInfo):
+
+    """Next to pack object information, a stream outputting an undeltified base object
+    is provided"""
+    __slots__ = tuple()
+
+    def __new__(cls, packoffset, type, size, stream, *args):
+        """Helps with the initialization of subclasses"""
+        return tuple.__new__(cls, (packoffset, type, size, stream))
+
+    #{ Stream Reader Interface
+    def read(self, size=-1):
+        return self[3].read(size)
+
+    @property
+    def stream(self):
+        return self[3]
+    #} END stream reader interface
+
+
+class ODeltaPackStream(ODeltaPackInfo):
+
+    """Provides a stream outputting the uncompressed offset delta information"""
+    __slots__ = tuple()
+
+    def __new__(cls, packoffset, type, size, delta_info, stream):
+        return tuple.__new__(cls, (packoffset, type, size, delta_info, stream))
+
+    #{ Stream Reader Interface
+    def read(self, size=-1):
+        return self[4].read(size)
+
+    @property
+    def stream(self):
+        return self[4]
+    #} END stream reader interface
+
+
+class IStream(list):
+
+    """Represents an input content stream to be fed into the ODB. It is mutable to allow
+    the ODB to record information about the operations outcome right in this instance.
+
+    It provides interfaces for the OStream and a StreamReader to allow the instance
+    to blend in without prior conversion.
+
+    The only method your content stream must support is 'read'"""
+    __slots__ = tuple()
+
+    def __new__(cls, type, size, stream, sha=None):
+        return list.__new__(cls, (sha, type, size, stream, None))
+
+    def __init__(self, type, size, stream, sha=None):
+        list.__init__(self, (sha, type, size, stream, None))
+
+    #{ Interface
+    @property
+    def hexsha(self):
+        """:return: our sha, hex encoded, 40 bytes"""
+        return bin_to_hex(self[0])
+
+    def _error(self):
+        """:return: the error that occurred when processing the stream, or None"""
+        return self[4]
+
+    def _set_error(self, exc):
+        """Set this input stream to the given exc, may be None to reset the error"""
+        self[4] = exc
+
+    error = property(_error, _set_error)
+
+    #} END interface
+
+    #{ Stream Reader Interface
+
+    def read(self, size=-1):
+        """Implements a simple stream reader interface, passing the read call on
+            to our internal stream"""
+        return self[3].read(size)
+
+    #} END stream reader interface
+
+    #{  interface
+
+    def _set_binsha(self, binsha):
+        self[0] = binsha
+
+    def _binsha(self):
+        return self[0]
+
+    binsha = property(_binsha, _set_binsha)
+
+    def _type(self):
+        return self[1]
+
+    def _set_type(self, type):
+        self[1] = type
+
+    type = property(_type, _set_type)
+
+    def _size(self):
+        return self[2]
+
+    def _set_size(self, size):
+        self[2] = size
+
+    size = property(_size, _set_size)
+
+    def _stream(self):
+        return self[3]
+
+    def _set_stream(self, stream):
+        self[3] = stream
+
+    stream = property(_stream, _set_stream)
+
+    #} END odb info interface
+
+
+class InvalidOInfo(tuple):
+
+    """Carries information about a sha identifying an object which is invalid in
+    the queried database. The exception attribute provides more information about
+    the cause of the issue"""
+    __slots__ = tuple()
+
+    def __new__(cls, sha, exc):
+        return tuple.__new__(cls, (sha, exc))
+
+    def __init__(self, sha, exc):
+        tuple.__init__(self, (sha, exc))
+
+    @property
+    def binsha(self):
+        return self[0]
+
+    @property
+    def hexsha(self):
+        return bin_to_hex(self[0])
+
+    @property
+    def error(self):
+        """:return: exception instance explaining the failure"""
+        return self[1]
+
+
+class InvalidOStream(InvalidOInfo):
+
+    """Carries information about an invalid ODB stream"""
+    __slots__ = tuple()
+
+#} END ODB Bases
diff --git a/libs/gitdb/const.py b/libs/gitdb/const.py
new file mode 100644
index 000000000..6391d796f
--- /dev/null
+++ b/libs/gitdb/const.py
@@ -0,0 +1,4 @@
+BYTE_SPACE = b' '
+NULL_BYTE = b'\0'
+NULL_HEX_SHA = "0" * 40
+NULL_BIN_SHA = NULL_BYTE * 20
diff --git a/libs/gitdb/db/__init__.py b/libs/gitdb/db/__init__.py
new file mode 100644
index 000000000..0a2a46a64
--- /dev/null
+++ b/libs/gitdb/db/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from gitdb.db.base import *
+from gitdb.db.loose import *
+from gitdb.db.mem import *
+from gitdb.db.pack import *
+from gitdb.db.git import *
+from gitdb.db.ref import *
diff --git a/libs/gitdb/db/base.py b/libs/gitdb/db/base.py
new file mode 100644
index 000000000..2d7b9fa8d
--- /dev/null
+++ b/libs/gitdb/db/base.py
@@ -0,0 +1,273 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains implementations of database retrieveing objects"""
+from gitdb.util import (
+    join,
+    LazyMixin,
+    hex_to_bin
+)
+
+from gitdb.utils.encoding import force_text
+from gitdb.exc import (
+    BadObject,
+    AmbiguousObjectName
+)
+
+from itertools import chain
+from functools import reduce
+
+
+__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB')
+
+
+class ObjectDBR(object):
+
+    """Defines an interface for object database lookup.
+    Objects are identified either by their 20 byte bin sha"""
+
+    def __contains__(self, sha):
+        return self.has_obj
+
+    #{ Query Interface
+    def has_object(self, sha):
+        """
+        :return: True if the object identified by the given 20 bytes
+            binary sha is contained in the database"""
+        raise NotImplementedError("To be implemented in subclass")
+
+    def info(self, sha):
+        """ :return: OInfo instance
+        :param sha: bytes binary sha
+        :raise BadObject:"""
+        raise NotImplementedError("To be implemented in subclass")
+
+    def stream(self, sha):
+        """:return: OStream instance
+        :param sha: 20 bytes binary sha
+        :raise BadObject:"""
+        raise NotImplementedError("To be implemented in subclass")
+
+    def size(self):
+        """:return: amount of objects in this database"""
+        raise NotImplementedError()
+
+    def sha_iter(self):
+        """Return iterator yielding 20 byte shas for all objects in this data base"""
+        raise NotImplementedError()
+
+    #} END query interface
+
+
+class ObjectDBW(object):
+
+    """Defines an interface to create objects in the database"""
+
+    def __init__(self, *args, **kwargs):
+        self._ostream = None
+
+    #{ Edit Interface
+    def set_ostream(self, stream):
+        """
+        Adjusts the stream to which all data should be sent when storing new objects
+
+        :param stream: if not None, the stream to use, if None the default stream
+            will be used.
+        :return: previously installed stream, or None if there was no override
+        :raise TypeError: if the stream doesn't have the supported functionality"""
+        cstream = self._ostream
+        self._ostream = stream
+        return cstream
+
+    def ostream(self):
+        """
+        :return: overridden output stream this instance will write to, or None
+            if it will write to the default stream"""
+        return self._ostream
+
+    def store(self, istream):
+        """
+        Create a new object in the database
+        :return: the input istream object with its sha set to its corresponding value
+
+        :param istream: IStream compatible instance. If its sha is already set
+            to a value, the object will just be stored in the our database format,
+            in which case the input stream is expected to be in object format ( header + contents ).
+        :raise IOError: if data could not be written"""
+        raise NotImplementedError("To be implemented in subclass")
+
+    #} END edit interface
+
+
+class FileDBBase(object):
+
+    """Provides basic facilities to retrieve files of interest, including
+    caching facilities to help mapping hexsha's to objects"""
+
+    def __init__(self, root_path):
+        """Initialize this instance to look for its files at the given root path
+        All subsequent operations will be relative to this path
+        :raise InvalidDBRoot:
+        **Note:** The base will not perform any accessablity checking as the base
+            might not yet be accessible, but become accessible before the first
+            access."""
+        super(FileDBBase, self).__init__()
+        self._root_path = root_path
+
+    #{ Interface
+    def root_path(self):
+        """:return: path at which this db operates"""
+        return self._root_path
+
+    def db_path(self, rela_path):
+        """
+        :return: the given relative path relative to our database root, allowing
+            to pontentially access datafiles"""
+        return join(self._root_path, force_text(rela_path))
+    #} END interface
+
+
+class CachingDB(object):
+
+    """A database which uses caches to speed-up access"""
+
+    #{ Interface
+    def update_cache(self, force=False):
+        """
+        Call this method if the underlying data changed to trigger an update
+        of the internal caching structures.
+
+        :param force: if True, the update must be performed. Otherwise the implementation
+            may decide not to perform an update if it thinks nothing has changed.
+        :return: True if an update was performed as something change indeed"""
+
+    # END interface
+
+
+def _databases_recursive(database, output):
+    """Fill output list with database from db, in order. Deals with Loose, Packed
+    and compound databases."""
+    if isinstance(database, CompoundDB):
+        dbs = database.databases()
+        output.extend(db for db in dbs if not isinstance(db, CompoundDB))
+        for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
+            _databases_recursive(cdb, output)
+    else:
+        output.append(database)
+    # END handle database type
+
+
+class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
+
+    """A database which delegates calls to sub-databases.
+
+    Databases are stored in the lazy-loaded _dbs attribute.
+    Define _set_cache_ to update it with your databases"""
+
+    def _set_cache_(self, attr):
+        if attr == '_dbs':
+            self._dbs = list()
+        elif attr == '_db_cache':
+            self._db_cache = dict()
+        else:
+            super(CompoundDB, self)._set_cache_(attr)
+
+    def _db_query(self, sha):
+        """:return: database containing the given 20 byte sha
+        :raise BadObject:"""
+        # most databases use binary representations, prevent converting
+        # it every time a database is being queried
+        try:
+            return self._db_cache[sha]
+        except KeyError:
+            pass
+        # END first level cache
+
+        for db in self._dbs:
+            if db.has_object(sha):
+                self._db_cache[sha] = db
+                return db
+        # END for each database
+        raise BadObject(sha)
+
+    #{ ObjectDBR interface
+
+    def has_object(self, sha):
+        try:
+            self._db_query(sha)
+            return True
+        except BadObject:
+            return False
+        # END handle exceptions
+
+    def info(self, sha):
+        return self._db_query(sha).info(sha)
+
+    def stream(self, sha):
+        return self._db_query(sha).stream(sha)
+
+    def size(self):
+        """:return: total size of all contained databases"""
+        return reduce(lambda x, y: x + y, (db.size() for db in self._dbs), 0)
+
+    def sha_iter(self):
+        return chain(*(db.sha_iter() for db in self._dbs))
+
+    #} END object DBR Interface
+
+    #{ Interface
+
+    def databases(self):
+        """:return: tuple of database instances we use for lookups"""
+        return tuple(self._dbs)
+
+    def update_cache(self, force=False):
+        # something might have changed, clear everything
+        self._db_cache.clear()
+        stat = False
+        for db in self._dbs:
+            if isinstance(db, CachingDB):
+                stat |= db.update_cache(force)
+            # END if is caching db
+        # END for each database to update
+        return stat
+
+    def partial_to_complete_sha_hex(self, partial_hexsha):
+        """
+        :return: 20 byte binary sha1 from the given less-than-40 byte hexsha (bytes or str)
+        :param partial_hexsha: hexsha with less than 40 byte
+        :raise AmbiguousObjectName: """
+        databases = list()
+        _databases_recursive(self, databases)
+        partial_hexsha = force_text(partial_hexsha)
+        len_partial_hexsha = len(partial_hexsha)
+        if len_partial_hexsha % 2 != 0:
+            partial_binsha = hex_to_bin(partial_hexsha + "0")
+        else:
+            partial_binsha = hex_to_bin(partial_hexsha)
+        # END assure successful binary conversion
+
+        candidate = None
+        for db in databases:
+            full_bin_sha = None
+            try:
+                if hasattr(db, 'partial_to_complete_sha_hex'):
+                    full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
+                else:
+                    full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
+                # END handle database type
+            except BadObject:
+                continue
+            # END ignore bad objects
+            if full_bin_sha:
+                if candidate and candidate != full_bin_sha:
+                    raise AmbiguousObjectName(partial_hexsha)
+                candidate = full_bin_sha
+            # END handle candidate
+        # END for each db
+        if not candidate:
+            raise BadObject(partial_binsha)
+        return candidate
+
+    #} END interface
diff --git a/libs/gitdb/db/git.py b/libs/gitdb/db/git.py
new file mode 100644
index 000000000..7a43d7235
--- /dev/null
+++ b/libs/gitdb/db/git.py
@@ -0,0 +1,85 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from gitdb.db.base import (
+    CompoundDB,
+    ObjectDBW,
+    FileDBBase
+)
+
+from gitdb.db.loose import LooseObjectDB
+from gitdb.db.pack import PackedDB
+from gitdb.db.ref import ReferenceDB
+
+from gitdb.exc import InvalidDBRoot
+
+import os
+
+__all__ = ('GitDB', )
+
+
+class GitDB(FileDBBase, ObjectDBW, CompoundDB):
+
+    """A git-style object database, which contains all objects in the 'objects'
+    subdirectory
+
+    ``IMPORTANT``: The usage of this implementation is highly discouraged as it fails to release file-handles.
+    This can be a problem with long-running processes and/or big repositories.
+    """
+    # Configuration
+    PackDBCls = PackedDB
+    LooseDBCls = LooseObjectDB
+    ReferenceDBCls = ReferenceDB
+
+    # Directories
+    packs_dir = 'pack'
+    loose_dir = ''
+    alternates_dir = os.path.join('info', 'alternates')
+
+    def __init__(self, root_path):
+        """Initialize ourselves on a git objects directory"""
+        super(GitDB, self).__init__(root_path)
+
+    def _set_cache_(self, attr):
+        if attr == '_dbs' or attr == '_loose_db':
+            self._dbs = list()
+            loose_db = None
+            for subpath, dbcls in ((self.packs_dir, self.PackDBCls),
+                                   (self.loose_dir, self.LooseDBCls),
+                                   (self.alternates_dir, self.ReferenceDBCls)):
+                path = self.db_path(subpath)
+                if os.path.exists(path):
+                    self._dbs.append(dbcls(path))
+                    if dbcls is self.LooseDBCls:
+                        loose_db = self._dbs[-1]
+                    # END remember loose db
+                # END check path exists
+            # END for each db type
+
+            # should have at least one subdb
+            if not self._dbs:
+                raise InvalidDBRoot(self.root_path())
+            # END handle error
+
+            # we the first one should have the store method
+            assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality"
+
+            # finally set the value
+            self._loose_db = loose_db
+        else:
+            super(GitDB, self)._set_cache_(attr)
+        # END handle attrs
+
+    #{ ObjectDBW interface
+
+    def store(self, istream):
+        return self._loose_db.store(istream)
+
+    def ostream(self):
+        return self._loose_db.ostream()
+
+    def set_ostream(self, ostream):
+        return self._loose_db.set_ostream(ostream)
+
+    #} END objectdbw interface
diff --git a/libs/gitdb/db/loose.py b/libs/gitdb/db/loose.py
new file mode 100644
index 000000000..192c524af
--- /dev/null
+++ b/libs/gitdb/db/loose.py
@@ -0,0 +1,262 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from gitdb.db.base import (
+    FileDBBase,
+    ObjectDBR,
+    ObjectDBW
+)
+
+
+from gitdb.exc import (
+    BadObject,
+    AmbiguousObjectName
+)
+
+from gitdb.stream import (
+    DecompressMemMapReader,
+    FDCompressedSha1Writer,
+    FDStream,
+    Sha1Writer
+)
+
+from gitdb.base import (
+    OStream,
+    OInfo
+)
+
+from gitdb.util import (
+    file_contents_ro_filepath,
+    ENOENT,
+    hex_to_bin,
+    bin_to_hex,
+    exists,
+    chmod,
+    isdir,
+    isfile,
+    remove,
+    mkdir,
+    rename,
+    dirname,
+    basename,
+    join
+)
+
+from gitdb.fun import (
+    chunk_size,
+    loose_object_header_info,
+    write_object,
+    stream_copy
+)
+
+from gitdb.utils.compat import MAXSIZE
+from gitdb.utils.encoding import force_bytes
+
+import tempfile
+import os
+
+
+__all__ = ('LooseObjectDB', )
+
+
+class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
+
+    """A database which operates on loose object files"""
+
+    # CONFIGURATION
+    # chunks in which data will be copied between streams
+    stream_chunk_size = chunk_size
+
+    # On windows we need to keep it writable, otherwise it cannot be removed
+    # either
+    new_objects_mode = int("444", 8)
+    if os.name == 'nt':
+        new_objects_mode = int("644", 8)
+
+    def __init__(self, root_path):
+        super(LooseObjectDB, self).__init__(root_path)
+        self._hexsha_to_file = dict()
+        # Additional Flags - might be set to 0 after the first failure
+        # Depending on the root, this might work for some mounts, for others not, which
+        # is why it is per instance
+        self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
+
+    #{ Interface
+    def object_path(self, hexsha):
+        """
+        :return: path at which the object with the given hexsha would be stored,
+            relative to the database root"""
+        return join(hexsha[:2], hexsha[2:])
+
+    def readable_db_object_path(self, hexsha):
+        """
+        :return: readable object path to the object identified by hexsha
+        :raise BadObject: If the object file does not exist"""
+        try:
+            return self._hexsha_to_file[hexsha]
+        except KeyError:
+            pass
+        # END ignore cache misses
+
+        # try filesystem
+        path = self.db_path(self.object_path(hexsha))
+        if exists(path):
+            self._hexsha_to_file[hexsha] = path
+            return path
+        # END handle cache
+        raise BadObject(hexsha)
+
+    def partial_to_complete_sha_hex(self, partial_hexsha):
+        """:return: 20 byte binary sha1 string which matches the given name uniquely
+        :param name: hexadecimal partial name (bytes or ascii string)
+        :raise AmbiguousObjectName:
+        :raise BadObject: """
+        candidate = None
+        for binsha in self.sha_iter():
+            if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):
+                # it can't ever find the same object twice
+                if candidate is not None:
+                    raise AmbiguousObjectName(partial_hexsha)
+                candidate = binsha
+        # END for each object
+        if candidate is None:
+            raise BadObject(partial_hexsha)
+        return candidate
+
+    #} END interface
+
+    def _map_loose_object(self, sha):
+        """
+        :return: memory map of that file to allow random read access
+        :raise BadObject: if object could not be located"""
+        db_path = self.db_path(self.object_path(bin_to_hex(sha)))
+        try:
+            return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
+        except OSError as e:
+            if e.errno != ENOENT:
+                # try again without noatime
+                try:
+                    return file_contents_ro_filepath(db_path)
+                except OSError:
+                    raise BadObject(sha)
+                # didn't work because of our flag, don't try it again
+                self._fd_open_flags = 0
+            else:
+                raise BadObject(sha)
+            # END handle error
+        # END exception handling
+
+    def set_ostream(self, stream):
+        """:raise TypeError: if the stream does not support the Sha1Writer interface"""
+        if stream is not None and not isinstance(stream, Sha1Writer):
+            raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
+        return super(LooseObjectDB, self).set_ostream(stream)
+
+    def info(self, sha):
+        m = self._map_loose_object(sha)
+        try:
+            typ, size = loose_object_header_info(m)
+            return OInfo(sha, typ, size)
+        finally:
+            if hasattr(m, 'close'):
+                m.close()
+        # END assure release of system resources
+
+    def stream(self, sha):
+        m = self._map_loose_object(sha)
+        type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)
+        return OStream(sha, type, size, stream)
+
+    def has_object(self, sha):
+        try:
+            self.readable_db_object_path(bin_to_hex(sha))
+            return True
+        except BadObject:
+            return False
+        # END check existence
+
+    def store(self, istream):
+        """note: The sha we produce will be hex by nature"""
+        tmp_path = None
+        writer = self.ostream()
+        if writer is None:
+            # open a tmp file to write the data to
+            fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+
+            if istream.binsha is None:
+                writer = FDCompressedSha1Writer(fd)
+            else:
+                writer = FDStream(fd)
+            # END handle direct stream copies
+        # END handle custom writer
+
+        try:
+            try:
+                if istream.binsha is not None:
+                    # copy as much as possible, the actual uncompressed item size might
+                    # be smaller than the compressed version
+                    stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size)
+                else:
+                    # write object with header, we have to make a new one
+                    write_object(istream.type, istream.size, istream.read, writer.write,
+                                 chunk_size=self.stream_chunk_size)
+                # END handle direct stream copies
+            finally:
+                if tmp_path:
+                    writer.close()
+            # END assure target stream is closed
+        except:
+            if tmp_path:
+                os.remove(tmp_path)
+            raise
+        # END assure tmpfile removal on error
+
+        hexsha = None
+        if istream.binsha:
+            hexsha = istream.hexsha
+        else:
+            hexsha = writer.sha(as_hex=True)
+        # END handle sha
+
+        if tmp_path:
+            obj_path = self.db_path(self.object_path(hexsha))
+            obj_dir = dirname(obj_path)
+            if not isdir(obj_dir):
+                mkdir(obj_dir)
+            # END handle destination directory
+            # rename onto existing doesn't work on windows
+            if os.name == 'nt':
+                if isfile(obj_path):
+                    remove(tmp_path)
+                else:
+                    rename(tmp_path, obj_path)
+                # end rename only if needed
+            else:
+                rename(tmp_path, obj_path)
+            # END handle win32
+
+            # make sure its readable for all ! It started out as rw-- tmp file
+            # but needs to be rwrr
+            chmod(obj_path, self.new_objects_mode)
+        # END handle dry_run
+
+        istream.binsha = hex_to_bin(hexsha)
+        return istream
+
+    def sha_iter(self):
+        # find all files which look like an object, extract sha from there
+        for root, dirs, files in os.walk(self.root_path()):
+            root_base = basename(root)
+            if len(root_base) != 2:
+                continue
+
+            for f in files:
+                if len(f) != 38:
+                    continue
+                yield hex_to_bin(root_base + f)
+            # END for each file
+        # END for each walk iteration
+
+    def size(self):
+        return len(tuple(self.sha_iter()))
diff --git a/libs/gitdb/db/mem.py b/libs/gitdb/db/mem.py
new file mode 100644
index 000000000..871133468
--- /dev/null
+++ b/libs/gitdb/db/mem.py
@@ -0,0 +1,112 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains the MemoryDatabase implementation"""
+from gitdb.db.loose import LooseObjectDB
+from gitdb.db.base import (
+    ObjectDBR,
+    ObjectDBW
+)
+
+from gitdb.base import (
+    OStream,
+    IStream,
+)
+
+from gitdb.exc import (
+    BadObject,
+    UnsupportedOperation
+)
+
+from gitdb.stream import (
+    ZippedStoreShaWriter,
+    DecompressMemMapReader,
+)
+
+from io import BytesIO
+
+__all__ = ("MemoryDB", )
+
+
+class MemoryDB(ObjectDBR, ObjectDBW):
+
+    """A memory database stores everything to memory, providing fast IO and object
+    retrieval. It should be used to buffer results and obtain SHAs before writing
+    it to the actual physical storage, as it allows to query whether object already
+    exists in the target storage before introducing actual IO"""
+
+    def __init__(self):
+        super(MemoryDB, self).__init__()
+        self._db = LooseObjectDB("path/doesnt/matter")
+
+        # maps 20 byte shas to their OStream objects
+        self._cache = dict()
+
+    def set_ostream(self, stream):
+        raise UnsupportedOperation("MemoryDB's always stream into memory")
+
+    def store(self, istream):
+        zstream = ZippedStoreShaWriter()
+        self._db.set_ostream(zstream)
+
+        istream = self._db.store(istream)
+        zstream.close()     # close to flush
+        zstream.seek(0)
+
+        # don't provide a size, the stream is written in object format, hence the
+        # header needs decompression
+        decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False)
+        self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
+
+        return istream
+
+    def has_object(self, sha):
+        return sha in self._cache
+
+    def info(self, sha):
+        # we always return streams, which are infos as well
+        return self.stream(sha)
+
+    def stream(self, sha):
+        try:
+            ostream = self._cache[sha]
+            # rewind stream for the next one to read
+            ostream.stream.seek(0)
+            return ostream
+        except KeyError:
+            raise BadObject(sha)
+        # END exception handling
+
+    def size(self):
+        return len(self._cache)
+
+    def sha_iter(self):
+        try:
+            return self._cache.iterkeys()
+        except AttributeError:
+            return self._cache.keys()
+
+    #{ Interface
+    def stream_copy(self, sha_iter, odb):
+        """Copy the streams as identified by sha's yielded by sha_iter into the given odb
+        The streams will be copied directly
+        **Note:** the object will only be written if it did not exist in the target db
+        :return: amount of streams actually copied into odb. If smaller than the amount
+            of input shas, one or more objects did already exist in odb"""
+        count = 0
+        for sha in sha_iter:
+            if odb.has_object(sha):
+                continue
+            # END check object existence
+
+            ostream = self.stream(sha)
+            # compressed data including header
+            sio = BytesIO(ostream.stream.data())
+            istream = IStream(ostream.type, ostream.size, sio, sha)
+
+            odb.store(istream)
+            count += 1
+        # END for each sha
+        return count
+    #} END interface
diff --git a/libs/gitdb/db/pack.py b/libs/gitdb/db/pack.py
new file mode 100644
index 000000000..6b03d8383
--- /dev/null
+++ b/libs/gitdb/db/pack.py
@@ -0,0 +1,207 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module containing a database to deal with packs"""
+from gitdb.db.base import (
+    FileDBBase,
+    ObjectDBR,
+    CachingDB
+)
+
+from gitdb.util import LazyMixin
+
+from gitdb.exc import (
+    BadObject,
+    UnsupportedOperation,
+    AmbiguousObjectName
+)
+
+from gitdb.pack import PackEntity
+from gitdb.utils.compat import xrange
+
+from functools import reduce
+
+import os
+import glob
+
+__all__ = ('PackedDB', )
+
+#{ Utilities
+
+
+class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
+
+    """A database operating on a set of object packs"""
+
+    # sort the priority list every N queries
+    # Higher values are better, performance tests don't show this has
+    # any effect, but it should have one
+    _sort_interval = 500
+
+    def __init__(self, root_path):
+        super(PackedDB, self).__init__(root_path)
+        # list of lists with three items:
+        # * hits - number of times the pack was hit with a request
+        # * entity - Pack entity instance
+        # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
+        # self._entities = list()       # lazy loaded list
+        self._hit_count = 0             # amount of hits
+        self._st_mtime = 0              # last modification data of our root path
+
+    def _set_cache_(self, attr):
+        if attr == '_entities':
+            self._entities = list()
+            self.update_cache(force=True)
+        # END handle entities initialization
+
+    def _sort_entities(self):
+        self._entities.sort(key=lambda l: l[0], reverse=True)
+
+    def _pack_info(self, sha):
+        """:return: tuple(entity, index) for an item at the given sha
+        :param sha: 20 or 40 byte sha
+        :raise BadObject:
+        **Note:** This method is not thread-safe, but may be hit in multi-threaded
+            operation. The worst thing that can happen though is a counter that
+            was not incremented, or the list being in wrong order. So we safe
+            the time for locking here, lets see how that goes"""
+        # presort ?
+        if self._hit_count % self._sort_interval == 0:
+            self._sort_entities()
+        # END update sorting
+
+        for item in self._entities:
+            index = item[2](sha)
+            if index is not None:
+                item[0] += 1            # one hit for you
+                self._hit_count += 1    # general hit count
+                return (item[1], index)
+            # END index found in pack
+        # END for each item
+
+        # no hit, see whether we have to update packs
+        # NOTE: considering packs don't change very often, we safe this call
+        # and leave it to the super-caller to trigger that
+        raise BadObject(sha)
+
+    #{ Object DB Read
+
+    def has_object(self, sha):
+        try:
+            self._pack_info(sha)
+            return True
+        except BadObject:
+            return False
+        # END exception handling
+
+    def info(self, sha):
+        entity, index = self._pack_info(sha)
+        return entity.info_at_index(index)
+
+    def stream(self, sha):
+        entity, index = self._pack_info(sha)
+        return entity.stream_at_index(index)
+
+    def sha_iter(self):
+        for entity in self.entities():
+            index = entity.index()
+            sha_by_index = index.sha
+            for index in xrange(index.size()):
+                yield sha_by_index(index)
+            # END for each index
+        # END for each entity
+
+    def size(self):
+        sizes = [item[1].index().size() for item in self._entities]
+        return reduce(lambda x, y: x + y, sizes, 0)
+
+    #} END object db read
+
+    #{ object db write
+
+    def store(self, istream):
+        """Storing individual objects is not feasible as a pack is designed to
+        hold multiple objects. Writing or rewriting packs for single objects is
+        inefficient"""
+        raise UnsupportedOperation()
+
+    #} END object db write
+
+    #{ Interface
+
+    def update_cache(self, force=False):
+        """
+        Update our cache with the acutally existing packs on disk. Add new ones,
+        and remove deleted ones. We keep the unchanged ones
+
+        :param force: If True, the cache will be updated even though the directory
+            does not appear to have changed according to its modification timestamp.
+        :return: True if the packs have been updated so there is new information,
+            False if there was no change to the pack database"""
+        stat = os.stat(self.root_path())
+        if not force and stat.st_mtime <= self._st_mtime:
+            return False
+        # END abort early on no change
+        self._st_mtime = stat.st_mtime
+
+        # packs are supposed to be prefixed with pack- by git-convention
+        # get all pack files, figure out what changed
+        pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
+        our_pack_files = set(item[1].pack().path() for item in self._entities)
+
+        # new packs
+        for pack_file in (pack_files - our_pack_files):
+            # init the hit-counter/priority with the size, a good measure for hit-
+            # probability. Its implemented so that only 12 bytes will be read
+            entity = PackEntity(pack_file)
+            self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
+        # END for each new packfile
+
+        # removed packs
+        for pack_file in (our_pack_files - pack_files):
+            del_index = -1
+            for i, item in enumerate(self._entities):
+                if item[1].pack().path() == pack_file:
+                    del_index = i
+                    break
+                # END found index
+            # END for each entity
+            assert del_index != -1
+            del(self._entities[del_index])
+        # END for each removed pack
+
+        # reinitialize prioritiess
+        self._sort_entities()
+        return True
+
+    def entities(self):
+        """:return: list of pack entities operated upon by this database"""
+        return [item[1] for item in self._entities]
+
+    def partial_to_complete_sha(self, partial_binsha, canonical_length):
+        """:return: 20 byte sha as inferred by the given partial binary sha
+        :param partial_binsha: binary sha with less than 20 bytes
+        :param canonical_length: length of the corresponding canonical representation.
+            It is required as binary sha's cannot display whether the original hex sha
+            had an odd or even number of characters
+        :raise AmbiguousObjectName:
+        :raise BadObject: """
+        candidate = None
+        for item in self._entities:
+            item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
+            if item_index is not None:
+                sha = item[1].index().sha(item_index)
+                if candidate and candidate != sha:
+                    raise AmbiguousObjectName(partial_binsha)
+                candidate = sha
+            # END handle full sha could be found
+        # END for each entity
+
+        if candidate:
+            return candidate
+
+        # still not found ?
+        raise BadObject(partial_binsha)
+
+    #} END interface
diff --git a/libs/gitdb/db/ref.py b/libs/gitdb/db/ref.py
new file mode 100644
index 000000000..94a2f01f8
--- /dev/null
+++ b/libs/gitdb/db/ref.py
@@ -0,0 +1,82 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+import codecs
+from gitdb.db.base import (
+    CompoundDB,
+)
+
+__all__ = ('ReferenceDB', )
+
+
+class ReferenceDB(CompoundDB):
+
+    """A database consisting of database referred to in a file"""
+
+    # Configuration
+    # Specifies the object database to use for the paths found in the alternates
+    # file. If None, it defaults to the GitDB
+    ObjectDBCls = None
+
+    def __init__(self, ref_file):
+        super(ReferenceDB, self).__init__()
+        self._ref_file = ref_file
+
+    def _set_cache_(self, attr):
+        if attr == '_dbs':
+            self._dbs = list()
+            self._update_dbs_from_ref_file()
+        else:
+            super(ReferenceDB, self)._set_cache_(attr)
+        # END handle attrs
+
+    def _update_dbs_from_ref_file(self):
+        dbcls = self.ObjectDBCls
+        if dbcls is None:
+            # late import
+            from gitdb.db.git import GitDB
+            dbcls = GitDB
+        # END get db type
+
+        # try to get as many as possible, don't fail if some are unavailable
+        ref_paths = list()
+        try:
+            with codecs.open(self._ref_file, 'r', encoding="utf-8") as f:
+                ref_paths = [l.strip() for l in f]
+        except (OSError, IOError):
+            pass
+        # END handle alternates
+
+        ref_paths_set = set(ref_paths)
+        cur_ref_paths_set = set(db.root_path() for db in self._dbs)
+
+        # remove existing
+        for path in (cur_ref_paths_set - ref_paths_set):
+            for i, db in enumerate(self._dbs[:]):
+                if db.root_path() == path:
+                    del(self._dbs[i])
+                    continue
+                # END del matching db
+        # END for each path to remove
+
+        # add new
+        # sort them to maintain order
+        added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
+        for path in added_paths:
+            try:
+                db = dbcls(path)
+                # force an update to verify path
+                if isinstance(db, CompoundDB):
+                    db.databases()
+                # END verification
+                self._dbs.append(db)
+            except Exception:
+                # ignore invalid paths or issues
+                pass
+        # END for each path to add
+
+    def update_cache(self, force=False):
+        # re-read alternates and update databases
+        self._update_dbs_from_ref_file()
+        return super(ReferenceDB, self).update_cache(force)
diff --git a/libs/gitdb/exc.py b/libs/gitdb/exc.py
new file mode 100644
index 000000000..947e5d8bf
--- /dev/null
+++ b/libs/gitdb/exc.py
@@ -0,0 +1,46 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module with common exceptions"""
+from gitdb.util import to_hex_sha
+
+
+class ODBError(Exception):
+    """All errors thrown by the object database"""
+
+
+class InvalidDBRoot(ODBError):
+    """Thrown if an object database cannot be initialized at the given path"""
+
+
+class BadObject(ODBError):
+    """The object with the given SHA does not exist. Instantiate with the
+    failed sha"""
+
+    def __str__(self):
+        return "BadObject: %s" % to_hex_sha(self.args[0])
+
+
+class BadName(ODBError):
+    """A name provided to rev_parse wasn't understood"""
+
+    def __str__(self):
+        return "Ref '%s' did not resolve to an object" % self.args[0]
+
+
+class ParseError(ODBError):
+    """Thrown if the parsing of a file failed due to an invalid format"""
+
+
+class AmbiguousObjectName(ODBError):
+    """Thrown if a possibly shortened name does not uniquely represent a single object
+    in the database"""
+
+
+class BadObjectType(ODBError):
+    """The object had an unsupported type"""
+
+
+class UnsupportedOperation(ODBError):
+    """Thrown if the given operation cannot be supported by the object database"""
diff --git a/libs/gitdb/fun.py b/libs/gitdb/fun.py
new file mode 100644
index 000000000..8ca38c867
--- /dev/null
+++ b/libs/gitdb/fun.py
@@ -0,0 +1,781 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains basic c-functions which usually contain performance critical code
+Keeping this code separate from the beginning makes it easier to out-source
+it into c later, if required"""
+
+import zlib
+from gitdb.util import byte_ord
+decompressobj = zlib.decompressobj
+
+import mmap
+from itertools import islice
+from functools import reduce
+
+from gitdb.const import NULL_BYTE, BYTE_SPACE
+from gitdb.utils.encoding import force_text
+from gitdb.utils.compat import izip, buffer, xrange, PY3
+from gitdb.typ import (
+    str_blob_type,
+    str_commit_type,
+    str_tree_type,
+    str_tag_type,
+)
+
+from io import StringIO
+
+# INVARIANTS
+OFS_DELTA = 6
+REF_DELTA = 7
+delta_types = (OFS_DELTA, REF_DELTA)
+
+type_id_to_type_map = {
+    0: b'',             # EXT 1
+    1: str_commit_type,
+    2: str_tree_type,
+    3: str_blob_type,
+    4: str_tag_type,
+    5: b'',             # EXT 2
+    OFS_DELTA: "OFS_DELTA",    # OFFSET DELTA
+    REF_DELTA: "REF_DELTA"     # REFERENCE DELTA
+}
+
+type_to_type_id_map = {
+    str_commit_type: 1,
+    str_tree_type: 2,
+    str_blob_type: 3,
+    str_tag_type: 4,
+    "OFS_DELTA": OFS_DELTA,
+    "REF_DELTA": REF_DELTA,
+}
+
+# used when dealing with larger streams
+chunk_size = 1000 * mmap.PAGESIZE
+
+__all__ = ('is_loose_object', 'loose_object_header_info', 'msb_size', 'pack_object_header_info',
+           'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data',
+           'is_equal_canonical_sha', 'connect_deltas', 'DeltaChunkList', 'create_pack_object_header')
+
+
+#{ Structures
+
+def _set_delta_rbound(d, size):
+    """Truncate the given delta to the given size
+    :param size: size relative to our target offset, may not be 0, must be smaller or equal
+        to our size
+    :return: d"""
+    d.ts = size
+
+    # NOTE: data is truncated automatically when applying the delta
+    # MUST NOT DO THIS HERE
+    return d
+
+
+def _move_delta_lbound(d, bytes):
+    """Move the delta by the given amount of bytes, reducing its size so that its
+    right bound stays static
+    :param bytes: amount of bytes to move, must be smaller than delta size
+    :return: d"""
+    if bytes == 0:
+        return
+
+    d.to += bytes
+    d.so += bytes
+    d.ts -= bytes
+    if d.data is not None:
+        d.data = d.data[bytes:]
+    # END handle data
+
+    return d
+
+
+def delta_duplicate(src):
+    return DeltaChunk(src.to, src.ts, src.so, src.data)
+
+
+def delta_chunk_apply(dc, bbuf, write):
+    """Apply own data to the target buffer
+    :param bbuf: buffer providing source bytes for copy operations
+    :param write: write method to call with data to write"""
+    if dc.data is None:
+        # COPY DATA FROM SOURCE
+        write(buffer(bbuf, dc.so, dc.ts))
+    else:
+        # APPEND DATA
+        # whats faster: if + 4 function calls or just a write with a slice ?
+        # Considering data can be larger than 127 bytes now, it should be worth it
+        if dc.ts < len(dc.data):
+            write(dc.data[:dc.ts])
+        else:
+            write(dc.data)
+        # END handle truncation
+    # END handle chunk mode
+
+
+class DeltaChunk(object):
+
+    """Represents a piece of a delta, it can either add new data, or copy existing
+    one from a source buffer"""
+    __slots__ = (
+        'to',       # start offset in the target buffer in bytes
+                    'ts',       # size of this chunk in the target buffer in bytes
+                    'so',       # start offset in the source buffer in bytes or None
+                    'data',     # chunk of bytes to be added to the target buffer,
+                                # DeltaChunkList to use as base, or None
+    )
+
+    def __init__(self, to, ts, so, data):
+        self.to = to
+        self.ts = ts
+        self.so = so
+        self.data = data
+
+    def __repr__(self):
+        return "DeltaChunk(%i, %i, %s, %s)" % (self.to, self.ts, self.so, self.data or "")
+
+    #{ Interface
+
+    def rbound(self):
+        return self.to + self.ts
+
+    def has_data(self):
+        """:return: True if the instance has data to add to the target stream"""
+        return self.data is not None
+
+    #} END interface
+
+
+def _closest_index(dcl, absofs):
+    """:return: index at which the given absofs should be inserted. The index points
+    to the DeltaChunk with a target buffer absofs that equals or is greater than
+    absofs.
+    **Note:** global method for performance only, it belongs to DeltaChunkList"""
+    lo = 0
+    hi = len(dcl)
+    while lo < hi:
+        mid = (lo + hi) / 2
+        dc = dcl[mid]
+        if dc.to > absofs:
+            hi = mid
+        elif dc.rbound() > absofs or dc.to == absofs:
+            return mid
+        else:
+            lo = mid + 1
+        # END handle bound
+    # END for each delta absofs
+    return len(dcl) - 1
+
+
+def delta_list_apply(dcl, bbuf, write):
+    """Apply the chain's changes and write the final result using the passed
+    write function.
+    :param bbuf: base buffer containing the base of all deltas contained in this
+        list. It will only be used if the chunk in question does not have a base
+        chain.
+    :param write: function taking a string of bytes to write to the output"""
+    for dc in dcl:
+        delta_chunk_apply(dc, bbuf, write)
+    # END for each dc
+
+
+def delta_list_slice(dcl, absofs, size, ndcl):
+    """:return: Subsection of this  list at the given absolute  offset, with the given
+        size in bytes.
+    :return: None"""
+    cdi = _closest_index(dcl, absofs)   # delta start index
+    cd = dcl[cdi]
+    slen = len(dcl)
+    lappend = ndcl.append
+
+    if cd.to != absofs:
+        tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data)
+        _move_delta_lbound(tcd, absofs - cd.to)
+        tcd.ts = min(tcd.ts, size)
+        lappend(tcd)
+        size -= tcd.ts
+        cdi += 1
+    # END lbound overlap handling
+
+    while cdi < slen and size:
+        # are we larger than the current block
+        cd = dcl[cdi]
+        if cd.ts <= size:
+            lappend(DeltaChunk(cd.to, cd.ts, cd.so, cd.data))
+            size -= cd.ts
+        else:
+            tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data)
+            tcd.ts = size
+            lappend(tcd)
+            size -= tcd.ts
+            break
+        # END hadle size
+        cdi += 1
+    # END for each chunk
+
+
+class DeltaChunkList(list):
+
+    """List with special functionality to deal with DeltaChunks.
+    There are two types of lists we represent. The one was created bottom-up, working
+    towards the latest delta, the other kind was created top-down, working from the
+    latest delta down to the earliest ancestor. This attribute is queryable
+    after all processing with is_reversed."""
+
+    __slots__ = tuple()
+
+    def rbound(self):
+        """:return: rightmost extend in bytes, absolute"""
+        if len(self) == 0:
+            return 0
+        return self[-1].rbound()
+
+    def lbound(self):
+        """:return: leftmost byte at which this chunklist starts"""
+        if len(self) == 0:
+            return 0
+        return self[0].to
+
+    def size(self):
+        """:return: size of bytes as measured by our delta chunks"""
+        return self.rbound() - self.lbound()
+
+    def apply(self, bbuf, write):
+        """Only used by public clients, internally we only use the global routines
+        for performance"""
+        return delta_list_apply(self, bbuf, write)
+
+    def compress(self):
+        """Alter the list to reduce the amount of nodes. Currently we concatenate
+        add-chunks
+        :return: self"""
+        slen = len(self)
+        if slen < 2:
+            return self
+        i = 0
+
+        first_data_index = None
+        while i < slen:
+            dc = self[i]
+            i += 1
+            if dc.data is None:
+                if first_data_index is not None and i - 2 - first_data_index > 1:
+                    # if first_data_index is not None:
+                    nd = StringIO()                     # new data
+                    so = self[first_data_index].to      # start offset in target buffer
+                    for x in xrange(first_data_index, i - 1):
+                        xdc = self[x]
+                        nd.write(xdc.data[:xdc.ts])
+                    # END collect data
+
+                    del(self[first_data_index:i - 1])
+                    buf = nd.getvalue()
+                    self.insert(first_data_index, DeltaChunk(so, len(buf), 0, buf))
+
+                    slen = len(self)
+                    i = first_data_index + 1
+
+                # END concatenate data
+                first_data_index = None
+                continue
+            # END skip non-data chunks
+
+            if first_data_index is None:
+                first_data_index = i - 1
+        # END iterate list
+
+        # if slen_orig != len(self):
+        #   print "INFO: Reduced delta list len to %f %% of former size" % ((float(len(self)) / slen_orig) * 100)
+        return self
+
+    def check_integrity(self, target_size=-1):
+        """Verify the list has non-overlapping chunks only, and the total size matches
+        target_size
+        :param target_size: if not -1, the total size of the chain must be target_size
+        :raise AssertionError: if the size doen't match"""
+        if target_size > -1:
+            assert self[-1].rbound() == target_size
+            assert reduce(lambda x, y: x + y, (d.ts for d in self), 0) == target_size
+        # END target size verification
+
+        if len(self) < 2:
+            return
+
+        # check data
+        for dc in self:
+            assert dc.ts > 0
+            if dc.has_data():
+                assert len(dc.data) >= dc.ts
+        # END for each dc
+
+        left = islice(self, 0, len(self) - 1)
+        right = iter(self)
+        right.next()
+        # this is very pythonic - we might have just use index based access here,
+        # but this could actually be faster
+        for lft, rgt in izip(left, right):
+            assert lft.rbound() == rgt.to
+            assert lft.to + lft.ts == rgt.to
+        # END for each pair
+
+
+class TopdownDeltaChunkList(DeltaChunkList):
+
+    """Represents a list which is generated by feeding its ancestor streams one by
+    one"""
+    __slots__ = tuple()
+
+    def connect_with_next_base(self, bdcl):
+        """Connect this chain with the next level of our base delta chunklist.
+        The goal in this game is to mark as many of our chunks rigid, hence they
+        cannot be changed by any of the upcoming bases anymore. Once all our
+        chunks are marked like that, we can stop all processing
+        :param bdcl: data chunk list being one of our bases. They must be fed in
+            consequtively and in order, towards the earliest ancestor delta
+        :return: True if processing was done. Use it to abort processing of
+            remaining streams if False is returned"""
+        nfc = 0                             # number of frozen chunks
+        dci = 0                             # delta chunk index
+        slen = len(self)                    # len of self
+        ccl = list()                        # temporary list
+        while dci < slen:
+            dc = self[dci]
+            dci += 1
+
+            # all add-chunks which are already topmost don't need additional processing
+            if dc.data is not None:
+                nfc += 1
+                continue
+            # END skip add chunks
+
+            # copy chunks
+            # integrate the portion of the base list into ourselves. Lists
+            # dont support efficient insertion ( just one at a time ), but for now
+            # we live with it. Internally, its all just a 32/64bit pointer, and
+            # the portions of moved memory should be smallish. Maybe we just rebuild
+            # ourselves in order to reduce the amount of insertions ...
+            del(ccl[:])
+            delta_list_slice(bdcl, dc.so, dc.ts, ccl)
+
+            # move the target bounds into place to match with our chunk
+            ofs = dc.to - dc.so
+            for cdc in ccl:
+                cdc.to += ofs
+            # END update target bounds
+
+            if len(ccl) == 1:
+                self[dci - 1] = ccl[0]
+            else:
+                # maybe try to compute the expenses here, and pick the right algorithm
+                # It would normally be faster than copying everything physically though
+                # TODO: Use a deque here, and decide by the index whether to extend
+                # or extend left !
+                post_dci = self[dci:]
+                del(self[dci - 1:])           # include deletion of dc
+                self.extend(ccl)
+                self.extend(post_dci)
+
+                slen = len(self)
+                dci += len(ccl) - 1           # deleted dc, added rest
+
+            # END handle chunk replacement
+        # END for each chunk
+
+        if nfc == slen:
+            return False
+        # END handle completeness
+        return True
+
+
+#} END structures
+
+#{ Routines
+
+def is_loose_object(m):
+    """
+    :return: True the file contained in memory map m appears to be a loose object.
+        Only the first two bytes are needed"""
+    b0, b1 = map(ord, m[:2])
+    word = (b0 << 8) + b1
+    return b0 == 0x78 and (word % 31) == 0
+
+
+def loose_object_header_info(m):
+    """
+    :return: tuple(type_string, uncompressed_size_in_bytes) the type string of the
+        object as well as its uncompressed size in bytes.
+    :param m: memory map from which to read the compressed object data"""
+    decompress_size = 8192      # is used in cgit as well
+    hdr = decompressobj().decompress(m, decompress_size)
+    type_name, size = hdr[:hdr.find(NULL_BYTE)].split(BYTE_SPACE)
+
+    return type_name, int(size)
+
+
+def pack_object_header_info(data):
+    """
+    :return: tuple(type_id, uncompressed_size_in_bytes, byte_offset)
+        The type_id should be interpreted according to the ``type_id_to_type_map`` map
+        The byte-offset specifies the start of the actual zlib compressed datastream
+    :param m: random-access memory, like a string or memory map"""
+    c = byte_ord(data[0])           # first byte
+    i = 1                           # next char to read
+    type_id = (c >> 4) & 7          # numeric type
+    size = c & 15                   # starting size
+    s = 4                           # starting bit-shift size
+    if PY3:
+        while c & 0x80:
+            c = byte_ord(data[i])
+            i += 1
+            size += (c & 0x7f) << s
+            s += 7
+        # END character loop
+    else:
+        while c & 0x80:
+            c = ord(data[i])
+            i += 1
+            size += (c & 0x7f) << s
+            s += 7
+        # END character loop
+    # end performance at expense of maintenance ...
+    return (type_id, size, i)
+
+
+def create_pack_object_header(obj_type, obj_size):
+    """
+    :return: string defining the pack header comprised of the object type
+        and its incompressed size in bytes
+
+    :param obj_type: pack type_id of the object
+    :param obj_size: uncompressed size in bytes of the following object stream"""
+    c = 0       # 1 byte
+    if PY3:
+        hdr = bytearray()  # output string
+
+        c = (obj_type << 4) | (obj_size & 0xf)
+        obj_size >>= 4
+        while obj_size:
+            hdr.append(c | 0x80)
+            c = obj_size & 0x7f
+            obj_size >>= 7
+        # END until size is consumed
+        hdr.append(c)
+    else:
+        hdr = bytes()  # output string
+
+        c = (obj_type << 4) | (obj_size & 0xf)
+        obj_size >>= 4
+        while obj_size:
+            hdr += chr(c | 0x80)
+            c = obj_size & 0x7f
+            obj_size >>= 7
+        # END until size is consumed
+        hdr += chr(c)
+    # end handle interpreter
+    return hdr
+
+
+def msb_size(data, offset=0):
+    """
+    :return: tuple(read_bytes, size) read the msb size from the given random
+        access data starting at the given byte offset"""
+    size = 0
+    i = 0
+    l = len(data)
+    hit_msb = False
+    if PY3:
+        while i < l:
+            c = data[i + offset]
+            size |= (c & 0x7f) << i * 7
+            i += 1
+            if not c & 0x80:
+                hit_msb = True
+                break
+            # END check msb bit
+        # END while in range
+    else:
+        while i < l:
+            c = ord(data[i + offset])
+            size |= (c & 0x7f) << i * 7
+            i += 1
+            if not c & 0x80:
+                hit_msb = True
+                break
+            # END check msb bit
+        # END while in range
+    # end performance ...
+    if not hit_msb:
+        raise AssertionError("Could not find terminating MSB byte in data stream")
+    return i + offset, size
+
+
+def loose_object_header(type, size):
+    """
+    :return: bytes representing the loose object header, which is immediately
+        followed by the content stream of size 'size'"""
+    return ('%s %i\0' % (force_text(type), size)).encode('ascii')
+
+
+def write_object(type, size, read, write, chunk_size=chunk_size):
+    """
+    Write the object as identified by type, size and source_stream into the
+    target_stream
+
+    :param type: type string of the object
+    :param size: amount of bytes to write from source_stream
+    :param read: read method of a stream providing the content data
+    :param write: write method of the output stream
+    :param close_target_stream: if True, the target stream will be closed when
+        the routine exits, even if an error is thrown
+    :return: The actual amount of bytes written to stream, which includes the header and a trailing newline"""
+    tbw = 0                                             # total num bytes written
+
+    # WRITE HEADER: type SP size NULL
+    tbw += write(loose_object_header(type, size))
+    tbw += stream_copy(read, write, size, chunk_size)
+
+    return tbw
+
+
+def stream_copy(read, write, size, chunk_size):
+    """
+    Copy a stream up to size bytes using the provided read and write methods,
+    in chunks of chunk_size
+
+    **Note:** its much like stream_copy utility, but operates just using methods"""
+    dbw = 0                                             # num data bytes written
+
+    # WRITE ALL DATA UP TO SIZE
+    while True:
+        cs = min(chunk_size, size - dbw)
+        # NOTE: not all write methods return the amount of written bytes, like
+        # mmap.write. Its bad, but we just deal with it ... perhaps its not
+        # even less efficient
+        # data_len = write(read(cs))
+        # dbw += data_len
+        data = read(cs)
+        data_len = len(data)
+        dbw += data_len
+        write(data)
+        if data_len < cs or dbw == size:
+            break
+        # END check for stream end
+    # END duplicate data
+    return dbw
+
+
+def connect_deltas(dstreams):
+    """
+    Read the condensed delta chunk information from dstream and merge its information
+        into a list of existing delta chunks
+
+    :param dstreams: iterable of delta stream objects, the delta to be applied last
+        comes first, then all its ancestors in order
+    :return: DeltaChunkList, containing all operations to apply"""
+    tdcl = None                         # topmost dcl
+
+    dcl = tdcl = TopdownDeltaChunkList()
+    for dsi, ds in enumerate(dstreams):
+        # print "Stream", dsi
+        db = ds.read()
+        delta_buf_size = ds.size
+
+        # read header
+        i, base_size = msb_size(db)
+        i, target_size = msb_size(db, i)
+
+        # interpret opcodes
+        tbw = 0                     # amount of target bytes written
+        while i < delta_buf_size:
+            c = ord(db[i])
+            i += 1
+            if c & 0x80:
+                cp_off, cp_size = 0, 0
+                if (c & 0x01):
+                    cp_off = ord(db[i])
+                    i += 1
+                if (c & 0x02):
+                    cp_off |= (ord(db[i]) << 8)
+                    i += 1
+                if (c & 0x04):
+                    cp_off |= (ord(db[i]) << 16)
+                    i += 1
+                if (c & 0x08):
+                    cp_off |= (ord(db[i]) << 24)
+                    i += 1
+                if (c & 0x10):
+                    cp_size = ord(db[i])
+                    i += 1
+                if (c & 0x20):
+                    cp_size |= (ord(db[i]) << 8)
+                    i += 1
+                if (c & 0x40):
+                    cp_size |= (ord(db[i]) << 16)
+                    i += 1
+
+                if not cp_size:
+                    cp_size = 0x10000
+
+                rbound = cp_off + cp_size
+                if (rbound < cp_size or
+                        rbound > base_size):
+                    break
+
+                dcl.append(DeltaChunk(tbw, cp_size, cp_off, None))
+                tbw += cp_size
+            elif c:
+                # NOTE: in C, the data chunks should probably be concatenated here.
+                # In python, we do it as a post-process
+                dcl.append(DeltaChunk(tbw, c, 0, db[i:i + c]))
+                i += c
+                tbw += c
+            else:
+                raise ValueError("unexpected delta opcode 0")
+            # END handle command byte
+        # END while processing delta data
+
+        dcl.compress()
+
+        # merge the lists !
+        if dsi > 0:
+            if not tdcl.connect_with_next_base(dcl):
+                break
+        # END handle merge
+
+        # prepare next base
+        dcl = DeltaChunkList()
+    # END for each delta stream
+
+    return tdcl
+
+
+def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
+    """
+    Apply data from a delta buffer using a source buffer to the target file
+
+    :param src_buf: random access data from which the delta was created
+    :param src_buf_size: size of the source buffer in bytes
+    :param delta_buf_size: size fo the delta buffer in bytes
+    :param delta_buf: random access delta data
+    :param write: write method taking a chunk of bytes
+
+    **Note:** transcribed to python from the similar routine in patch-delta.c"""
+    i = 0
+    db = delta_buf
+    if PY3:
+        while i < delta_buf_size:
+            c = db[i]
+            i += 1
+            if c & 0x80:
+                cp_off, cp_size = 0, 0
+                if (c & 0x01):
+                    cp_off = db[i]
+                    i += 1
+                if (c & 0x02):
+                    cp_off |= (db[i] << 8)
+                    i += 1
+                if (c & 0x04):
+                    cp_off |= (db[i] << 16)
+                    i += 1
+                if (c & 0x08):
+                    cp_off |= (db[i] << 24)
+                    i += 1
+                if (c & 0x10):
+                    cp_size = db[i]
+                    i += 1
+                if (c & 0x20):
+                    cp_size |= (db[i] << 8)
+                    i += 1
+                if (c & 0x40):
+                    cp_size |= (db[i] << 16)
+                    i += 1
+
+                if not cp_size:
+                    cp_size = 0x10000
+
+                rbound = cp_off + cp_size
+                if (rbound < cp_size or
+                        rbound > src_buf_size):
+                    break
+                write(buffer(src_buf, cp_off, cp_size))
+            elif c:
+                write(db[i:i + c])
+                i += c
+            else:
+                raise ValueError("unexpected delta opcode 0")
+            # END handle command byte
+        # END while processing delta data
+    else:
+        while i < delta_buf_size:
+            c = ord(db[i])
+            i += 1
+            if c & 0x80:
+                cp_off, cp_size = 0, 0
+                if (c & 0x01):
+                    cp_off = ord(db[i])
+                    i += 1
+                if (c & 0x02):
+                    cp_off |= (ord(db[i]) << 8)
+                    i += 1
+                if (c & 0x04):
+                    cp_off |= (ord(db[i]) << 16)
+                    i += 1
+                if (c & 0x08):
+                    cp_off |= (ord(db[i]) << 24)
+                    i += 1
+                if (c & 0x10):
+                    cp_size = ord(db[i])
+                    i += 1
+                if (c & 0x20):
+                    cp_size |= (ord(db[i]) << 8)
+                    i += 1
+                if (c & 0x40):
+                    cp_size |= (ord(db[i]) << 16)
+                    i += 1
+
+                if not cp_size:
+                    cp_size = 0x10000
+
+                rbound = cp_off + cp_size
+                if (rbound < cp_size or
+                        rbound > src_buf_size):
+                    break
+                write(buffer(src_buf, cp_off, cp_size))
+            elif c:
+                write(db[i:i + c])
+                i += c
+            else:
+                raise ValueError("unexpected delta opcode 0")
+            # END handle command byte
+        # END while processing delta data
+    # end save byte_ord call and prevent performance regression in py2
+
+    # yes, lets use the exact same error message that git uses :)
+    assert i == delta_buf_size, "delta replay has gone wild"
+
+
+def is_equal_canonical_sha(canonical_length, match, sha1):
+    """
+    :return: True if the given lhs and rhs 20 byte binary shas
+        The comparison will take the canonical_length of the match sha into account,
+        hence the comparison will only use the last 4 bytes for uneven canonical representations
+    :param match: less than 20 byte sha
+    :param sha1: 20 byte sha"""
+    binary_length = canonical_length // 2
+    if match[:binary_length] != sha1[:binary_length]:
+        return False
+
+    if canonical_length - binary_length and \
+            (byte_ord(match[-1]) ^ byte_ord(sha1[len(match) - 1])) & 0xf0:
+        return False
+    # END handle uneven canonnical length
+    return True
+
+#} END routines
+
+
+try:
+    from gitdb_speedups._perf import connect_deltas
+except ImportError:
+    pass
diff --git a/libs/gitdb/pack.py b/libs/gitdb/pack.py
new file mode 100644
index 000000000..115d94365
--- /dev/null
+++ b/libs/gitdb/pack.py
@@ -0,0 +1,1033 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains PackIndexFile and PackFile implementations"""
+import zlib
+
+from gitdb.exc import (
+    BadObject,
+    AmbiguousObjectName,
+    UnsupportedOperation,
+    ParseError
+)
+
+from gitdb.util import (
+    mman,
+    LazyMixin,
+    unpack_from,
+    bin_to_hex,
+    byte_ord,
+)
+
+from gitdb.fun import (
+    create_pack_object_header,
+    pack_object_header_info,
+    is_equal_canonical_sha,
+    type_id_to_type_map,
+    write_object,
+    stream_copy,
+    chunk_size,
+    delta_types,
+    OFS_DELTA,
+    REF_DELTA,
+    msb_size
+)
+
+try:
+    from gitdb_speedups._perf import PackIndexFile_sha_to_index
+except ImportError:
+    pass
+# END try c module
+
+from gitdb.base import (      # Amazing !
+    OInfo,
+    OStream,
+    OPackInfo,
+    OPackStream,
+    ODeltaStream,
+    ODeltaPackInfo,
+    ODeltaPackStream,
+)
+
+from gitdb.stream import (
+    DecompressMemMapReader,
+    DeltaApplyReader,
+    Sha1Writer,
+    NullStream,
+    FlexibleSha1Writer
+)
+
+from struct import pack
+from binascii import crc32
+
+from gitdb.const import NULL_BYTE
+from gitdb.utils.compat import (
+    izip, 
+    buffer, 
+    xrange,
+    to_bytes
+)
+
+import tempfile
+import array
+import os
+import sys
+
+__all__ = ('PackIndexFile', 'PackFile', 'PackEntity')
+
+
+#{ Utilities
+
+def pack_object_at(cursor, offset, as_stream):
+    """
+    :return: Tuple(abs_data_offset, PackInfo|PackStream)
+        an object of the correct type according to the type_id  of the object.
+        If as_stream is True, the object will contain a stream, allowing  the
+        data to be read decompressed.
+    :param data: random accessible data containing all required information
+    :parma offset: offset in to the data at which the object information is located
+    :param as_stream: if True, a stream object will be returned that can read
+        the data, otherwise you receive an info object only"""
+    data = cursor.use_region(offset).buffer()
+    type_id, uncomp_size, data_rela_offset = pack_object_header_info(data)
+    total_rela_offset = None                # set later, actual offset until data stream begins
+    delta_info = None
+
+    # OFFSET DELTA
+    if type_id == OFS_DELTA:
+        i = data_rela_offset
+        c = byte_ord(data[i])
+        i += 1
+        delta_offset = c & 0x7f
+        while c & 0x80:
+            c = byte_ord(data[i])
+            i += 1
+            delta_offset += 1
+            delta_offset = (delta_offset << 7) + (c & 0x7f)
+        # END character loop
+        delta_info = delta_offset
+        total_rela_offset = i
+    # REF DELTA
+    elif type_id == REF_DELTA:
+        total_rela_offset = data_rela_offset + 20
+        delta_info = data[data_rela_offset:total_rela_offset]
+    # BASE OBJECT
+    else:
+        # assume its a base object
+        total_rela_offset = data_rela_offset
+    # END handle type id
+    abs_data_offset = offset + total_rela_offset
+    if as_stream:
+        stream = DecompressMemMapReader(buffer(data, total_rela_offset), False, uncomp_size)
+        if delta_info is None:
+            return abs_data_offset, OPackStream(offset, type_id, uncomp_size, stream)
+        else:
+            return abs_data_offset, ODeltaPackStream(offset, type_id, uncomp_size, delta_info, stream)
+    else:
+        if delta_info is None:
+            return abs_data_offset, OPackInfo(offset, type_id, uncomp_size)
+        else:
+            return abs_data_offset, ODeltaPackInfo(offset, type_id, uncomp_size, delta_info)
+        # END handle info
+    # END handle stream
+
+
+def write_stream_to_pack(read, write, zstream, base_crc=None):
+    """Copy a stream as read from read function, zip it, and write the result.
+    Count the number of written bytes and return it
+    :param base_crc: if not None, the crc will be the base for all compressed data
+        we consecutively write and generate a crc32 from. If None, no crc will be generated
+    :return: tuple(no bytes read, no bytes written, crc32) crc might be 0 if base_crc
+        was false"""
+    br = 0      # bytes read
+    bw = 0      # bytes written
+    want_crc = base_crc is not None
+    crc = 0
+    if want_crc:
+        crc = base_crc
+    # END initialize crc
+
+    while True:
+        chunk = read(chunk_size)
+        br += len(chunk)
+        compressed = zstream.compress(chunk)
+        bw += len(compressed)
+        write(compressed)           # cannot assume return value
+
+        if want_crc:
+            crc = crc32(compressed, crc)
+        # END handle crc
+
+        if len(chunk) != chunk_size:
+            break
+    # END copy loop
+
+    compressed = zstream.flush()
+    bw += len(compressed)
+    write(compressed)
+    if want_crc:
+        crc = crc32(compressed, crc)
+    # END handle crc
+
+    return (br, bw, crc)
+
+
+#} END utilities
+
+
+class IndexWriter(object):
+
+    """Utility to cache index information, allowing to write all information later
+    in one go to the given stream
+    **Note:** currently only writes v2 indices"""
+    __slots__ = '_objs'
+
+    def __init__(self):
+        self._objs = list()
+
+    def append(self, binsha, crc, offset):
+        """Append one piece of object information"""
+        self._objs.append((binsha, crc, offset))
+
+    def write(self, pack_sha, write):
+        """Write the index file using the given write method
+        :param pack_sha: binary sha over the whole pack that we index
+        :return: sha1 binary sha over all index file contents"""
+        # sort for sha1 hash
+        self._objs.sort(key=lambda o: o[0])
+
+        sha_writer = FlexibleSha1Writer(write)
+        sha_write = sha_writer.write
+        sha_write(PackIndexFile.index_v2_signature)
+        sha_write(pack(">L", PackIndexFile.index_version_default))
+
+        # fanout
+        tmplist = list((0,) * 256)                                # fanout or list with 64 bit offsets
+        for t in self._objs:
+            tmplist[byte_ord(t[0][0])] += 1
+        # END prepare fanout
+        for i in xrange(255):
+            v = tmplist[i]
+            sha_write(pack('>L', v))
+            tmplist[i + 1] += v
+        # END write each fanout entry
+        sha_write(pack('>L', tmplist[255]))
+
+        # sha1 ordered
+        # save calls, that is push them into c
+        sha_write(b''.join(t[0] for t in self._objs))
+
+        # crc32
+        for t in self._objs:
+            sha_write(pack('>L', t[1] & 0xffffffff))
+        # END for each crc
+
+        tmplist = list()
+        # offset 32
+        for t in self._objs:
+            ofs = t[2]
+            if ofs > 0x7fffffff:
+                tmplist.append(ofs)
+                ofs = 0x80000000 + len(tmplist) - 1
+            # END hande 64 bit offsets
+            sha_write(pack('>L', ofs & 0xffffffff))
+        # END for each offset
+
+        # offset 64
+        for ofs in tmplist:
+            sha_write(pack(">Q", ofs))
+        # END for each offset
+
+        # trailer
+        assert(len(pack_sha) == 20)
+        sha_write(pack_sha)
+        sha = sha_writer.sha(as_hex=False)
+        write(sha)
+        return sha
+
+
+class PackIndexFile(LazyMixin):
+
+    """A pack index provides offsets into the corresponding pack, allowing to find
+    locations for offsets faster."""
+
+    # Dont use slots as we dynamically bind functions for each version, need a dict for this
+    # The slots you see here are just to keep track of our instance variables
+    # __slots__ = ('_indexpath', '_fanout_table', '_cursor', '_version',
+    #               '_sha_list_offset', '_crc_list_offset', '_pack_offset', '_pack_64_offset')
+
+    # used in v2 indices
+    _sha_list_offset = 8 + 1024
+    index_v2_signature = b'\xfftOc'
+    index_version_default = 2
+
+    def __init__(self, indexpath):
+        super(PackIndexFile, self).__init__()
+        self._indexpath = indexpath
+
+    def close(self):
+        mman.force_map_handle_removal_win(self._indexpath)
+        self._cursor = None
+        
+    def _set_cache_(self, attr):
+        if attr == "_packfile_checksum":
+            self._packfile_checksum = self._cursor.map()[-40:-20]
+        elif attr == "_packfile_checksum":
+            self._packfile_checksum = self._cursor.map()[-20:]
+        elif attr == "_cursor":
+            # Note: We don't lock the file when reading as we cannot be sure
+            # that we can actually write to the location - it could be a read-only
+            # alternate for instance
+            self._cursor = mman.make_cursor(self._indexpath).use_region()
+            # We will assume that the index will always fully fit into memory !
+            if mman.window_size() > 0 and self._cursor.file_size() > mman.window_size():
+                raise AssertionError("The index file at %s is too large to fit into a mapped window (%i > %i). This is a limitation of the implementation" % (
+                    self._indexpath, self._cursor.file_size(), mman.window_size()))
+            # END assert window size
+        else:
+            # now its time to initialize everything - if we are here, someone wants
+            # to access the fanout table or related properties
+
+            # CHECK VERSION
+            mmap = self._cursor.map()
+            self._version = (mmap[:4] == self.index_v2_signature and 2) or 1
+            if self._version == 2:
+                version_id = unpack_from(">L", mmap, 4)[0]
+                assert version_id == self._version, "Unsupported index version: %i" % version_id
+            # END assert version
+
+            # SETUP FUNCTIONS
+            # setup our functions according to the actual version
+            for fname in ('entry', 'offset', 'sha', 'crc'):
+                setattr(self, fname, getattr(self, "_%s_v%i" % (fname, self._version)))
+            # END for each function to initialize
+
+            # INITIALIZE DATA
+            # byte offset is 8 if version is 2, 0 otherwise
+            self._initialize()
+        # END handle attributes
+
+    #{ Access V1
+
+    def _entry_v1(self, i):
+        """:return: tuple(offset, binsha, 0)"""
+        return unpack_from(">L20s", self._cursor.map(), 1024 + i * 24) + (0, )
+
+    def _offset_v1(self, i):
+        """see ``_offset_v2``"""
+        return unpack_from(">L", self._cursor.map(), 1024 + i * 24)[0]
+
+    def _sha_v1(self, i):
+        """see ``_sha_v2``"""
+        base = 1024 + (i * 24) + 4
+        return self._cursor.map()[base:base + 20]
+
+    def _crc_v1(self, i):
+        """unsupported"""
+        return 0
+
+    #} END access V1
+
+    #{ Access V2
+    def _entry_v2(self, i):
+        """:return: tuple(offset, binsha, crc)"""
+        return (self._offset_v2(i), self._sha_v2(i), self._crc_v2(i))
+
+    def _offset_v2(self, i):
+        """:return: 32 or 64 byte offset into pack files. 64 byte offsets will only
+            be returned if the pack is larger than 4 GiB, or 2^32"""
+        offset = unpack_from(">L", self._cursor.map(), self._pack_offset + i * 4)[0]
+
+        # if the high-bit is set, this indicates that we have to lookup the offset
+        # in the 64 bit region of the file. The current offset ( lower 31 bits )
+        # are the index into it
+        if offset & 0x80000000:
+            offset = unpack_from(">Q", self._cursor.map(), self._pack_64_offset + (offset & ~0x80000000) * 8)[0]
+        # END handle 64 bit offset
+
+        return offset
+
+    def _sha_v2(self, i):
+        """:return: sha at the given index of this file index instance"""
+        base = self._sha_list_offset + i * 20
+        return self._cursor.map()[base:base + 20]
+
+    def _crc_v2(self, i):
+        """:return: 4 bytes crc for the object at index i"""
+        return unpack_from(">L", self._cursor.map(), self._crc_list_offset + i * 4)[0]
+
+    #} END access V2
+
+    #{ Initialization
+
+    def _initialize(self):
+        """initialize base data"""
+        self._fanout_table = self._read_fanout((self._version == 2) * 8)
+
+        if self._version == 2:
+            self._crc_list_offset = self._sha_list_offset + self.size() * 20
+            self._pack_offset = self._crc_list_offset + self.size() * 4
+            self._pack_64_offset = self._pack_offset + self.size() * 4
+        # END setup base
+
+    def _read_fanout(self, byte_offset):
+        """Generate a fanout table from our data"""
+        d = self._cursor.map()
+        out = list()
+        append = out.append
+        for i in xrange(256):
+            append(unpack_from('>L', d, byte_offset + i * 4)[0])
+        # END for each entry
+        return out
+
+    #} END initialization
+
+    #{ Properties
+    def version(self):
+        return self._version
+
+    def size(self):
+        """:return: amount of objects referred to by this index"""
+        return self._fanout_table[255]
+
+    def path(self):
+        """:return: path to the packindexfile"""
+        return self._indexpath
+
+    def packfile_checksum(self):
+        """:return: 20 byte sha representing the sha1 hash of the pack file"""
+        return self._cursor.map()[-40:-20]
+
+    def indexfile_checksum(self):
+        """:return: 20 byte sha representing the sha1 hash of this index file"""
+        return self._cursor.map()[-20:]
+
+    def offsets(self):
+        """:return: sequence of all offsets in the order in which they were written
+
+        **Note:** return value can be random accessed, but may be immmutable"""
+        if self._version == 2:
+            # read stream to array, convert to tuple
+            a = array.array('I')    # 4 byte unsigned int, long are 8 byte on 64 bit it appears
+            a.fromstring(buffer(self._cursor.map(), self._pack_offset, self._pack_64_offset - self._pack_offset))
+
+            # networkbyteorder to something array likes more
+            if sys.byteorder == 'little':
+                a.byteswap()
+            return a
+        else:
+            return tuple(self.offset(index) for index in xrange(self.size()))
+        # END handle version
+
+    def sha_to_index(self, sha):
+        """
+        :return: index usable with the ``offset`` or ``entry`` method, or None
+            if the sha was not found in this pack index
+        :param sha: 20 byte sha to lookup"""
+        first_byte = byte_ord(sha[0])
+        get_sha = self.sha
+        lo = 0                  # lower index, the left bound of the bisection
+        if first_byte != 0:
+            lo = self._fanout_table[first_byte - 1]
+        hi = self._fanout_table[first_byte]     # the upper, right bound of the bisection
+
+        # bisect until we have the sha
+        while lo < hi:
+            mid = (lo + hi) // 2
+            mid_sha = get_sha(mid)
+            if sha < mid_sha:
+                hi = mid
+            elif sha == mid_sha:
+                return mid
+            else:
+                lo = mid + 1
+            # END handle midpoint
+        # END bisect
+        return None
+
+    def partial_sha_to_index(self, partial_bin_sha, canonical_length):
+        """
+        :return: index as in `sha_to_index` or None if the sha was not found in this
+            index file
+        :param partial_bin_sha: an at least two bytes of a partial binary sha as bytes
+        :param canonical_length: length of the original hexadecimal representation of the
+            given partial binary sha
+        :raise AmbiguousObjectName:"""
+        if len(partial_bin_sha) < 2:
+            raise ValueError("Require at least 2 bytes of partial sha")
+
+        assert isinstance(partial_bin_sha, bytes), "partial_bin_sha must be bytes"
+        first_byte = byte_ord(partial_bin_sha[0])
+
+        get_sha = self.sha
+        lo = 0                  # lower index, the left bound of the bisection
+        if first_byte != 0:
+            lo = self._fanout_table[first_byte - 1]
+        hi = self._fanout_table[first_byte]     # the upper, right bound of the bisection
+
+        # fill the partial to full 20 bytes
+        filled_sha = partial_bin_sha + NULL_BYTE * (20 - len(partial_bin_sha))
+
+        # find lowest
+        while lo < hi:
+            mid = (lo + hi) // 2
+            mid_sha = get_sha(mid)
+            if filled_sha < mid_sha:
+                hi = mid
+            elif filled_sha == mid_sha:
+                # perfect match
+                lo = mid
+                break
+            else:
+                lo = mid + 1
+            # END handle midpoint
+        # END bisect
+
+        if lo < self.size():
+            cur_sha = get_sha(lo)
+            if is_equal_canonical_sha(canonical_length, partial_bin_sha, cur_sha):
+                next_sha = None
+                if lo + 1 < self.size():
+                    next_sha = get_sha(lo + 1)
+                if next_sha and next_sha == cur_sha:
+                    raise AmbiguousObjectName(partial_bin_sha)
+                return lo
+            # END if we have a match
+        # END if we found something
+        return None
+
+    if 'PackIndexFile_sha_to_index' in globals():
+        # NOTE: Its just about 25% faster, the major bottleneck might be the attr
+        # accesses
+        def sha_to_index(self, sha):
+            return PackIndexFile_sha_to_index(self, sha)
+    # END redefine heavy-hitter with c version
+
+    #} END properties
+
+
+class PackFile(LazyMixin):
+
+    """A pack is a file written according to the Version 2 for git packs
+
+    As we currently use memory maps, it could be assumed that the maximum size of
+    packs therefor is 32 bit on 32 bit systems. On 64 bit systems, this should be
+    fine though.
+
+    **Note:** at some point, this might be implemented using streams as well, or
+    streams are an alternate path in the case memory maps cannot be created
+    for some reason - one clearly doesn't want to read 10GB at once in that
+    case"""
+
+    __slots__ = ('_packpath', '_cursor', '_size', '_version')
+    pack_signature = 0x5041434b     # 'PACK'
+    pack_version_default = 2
+
+    # offset into our data at which the first object starts
+    first_object_offset = 3 * 4       # header bytes
+    footer_size = 20                # final sha
+
+    def __init__(self, packpath):
+        self._packpath = packpath
+
+    def close(self):
+        mman.force_map_handle_removal_win(self._packpath)
+        self._cursor = None
+        
+    def _set_cache_(self, attr):
+        # we fill the whole cache, whichever attribute gets queried first
+        self._cursor = mman.make_cursor(self._packpath).use_region()
+
+        # read the header information
+        type_id, self._version, self._size = unpack_from(">LLL", self._cursor.map(), 0)
+
+        # TODO: figure out whether we should better keep the lock, or maybe
+        # add a .keep file instead ?
+        if type_id != self.pack_signature:
+            raise ParseError("Invalid pack signature: %i" % type_id)
+
+    def _iter_objects(self, start_offset, as_stream=True):
+        """Handle the actual iteration of objects within this pack"""
+        c = self._cursor
+        content_size = c.file_size() - self.footer_size
+        cur_offset = start_offset or self.first_object_offset
+
+        null = NullStream()
+        while cur_offset < content_size:
+            data_offset, ostream = pack_object_at(c, cur_offset, True)
+            # scrub the stream to the end - this decompresses the object, but yields
+            # the amount of compressed bytes we need to get to the next offset
+
+            stream_copy(ostream.read, null.write, ostream.size, chunk_size)
+            assert ostream.stream._br == ostream.size
+            cur_offset += (data_offset - ostream.pack_offset) + ostream.stream.compressed_bytes_read()
+
+            # if a stream is requested, reset it beforehand
+            # Otherwise return the Stream object directly, its derived from the
+            # info object
+            if as_stream:
+                ostream.stream.seek(0)
+            yield ostream
+        # END until we have read everything
+
+    #{ Pack Information
+
+    def size(self):
+        """:return: The amount of objects stored in this pack"""
+        return self._size
+
+    def version(self):
+        """:return: the version of this pack"""
+        return self._version
+
+    def data(self):
+        """
+        :return: read-only data of this pack. It provides random access and usually
+            is a memory map.
+        :note: This method is unsafe as it returns a window into a file which might be larger than than the actual window size"""
+        # can use map as we are starting at offset 0. Otherwise we would have to use buffer()
+        return self._cursor.use_region().map()
+
+    def checksum(self):
+        """:return: 20 byte sha1 hash on all object sha's contained in this file"""
+        return self._cursor.use_region(self._cursor.file_size() - 20).buffer()[:]
+
+    def path(self):
+        """:return: path to the packfile"""
+        return self._packpath
+    #} END pack information
+
+    #{ Pack Specific
+
+    def collect_streams(self, offset):
+        """
+        :return: list of pack streams which are required to build the object
+            at the given offset. The first entry of the list is the object at offset,
+            the last one is either a full object, or a REF_Delta stream. The latter
+            type needs its reference object to be locked up in an ODB to form a valid
+            delta chain.
+            If the object at offset is no delta, the size of the list is 1.
+        :param offset: specifies the first byte of the object within this pack"""
+        out = list()
+        c = self._cursor
+        while True:
+            ostream = pack_object_at(c, offset, True)[1]
+            out.append(ostream)
+            if ostream.type_id == OFS_DELTA:
+                offset = ostream.pack_offset - ostream.delta_info
+            else:
+                # the only thing we can lookup are OFFSET deltas. Everything
+                # else is either an object, or a ref delta, in the latter
+                # case someone else has to find it
+                break
+            # END handle type
+        # END while chaining streams
+        return out
+
+    #} END pack specific
+
+    #{ Read-Database like Interface
+
+    def info(self, offset):
+        """Retrieve information about the object at the given file-absolute offset
+
+        :param offset: byte offset
+        :return: OPackInfo instance, the actual type differs depending on the type_id attribute"""
+        return pack_object_at(self._cursor, offset or self.first_object_offset, False)[1]
+
+    def stream(self, offset):
+        """Retrieve an object at the given file-relative offset as stream along with its information
+
+        :param offset: byte offset
+        :return: OPackStream instance, the actual type differs depending on the type_id attribute"""
+        return pack_object_at(self._cursor, offset or self.first_object_offset, True)[1]
+
+    def stream_iter(self, start_offset=0):
+        """
+        :return: iterator yielding OPackStream compatible instances, allowing
+            to access the data in the pack directly.
+        :param start_offset: offset to the first object to iterate. If 0, iteration
+            starts at the very first object in the pack.
+
+        **Note:** Iterating a pack directly is costly as the datastream has to be decompressed
+        to determine the bounds between the objects"""
+        return self._iter_objects(start_offset, as_stream=True)
+
+    #} END Read-Database like Interface
+
+
+class PackEntity(LazyMixin):
+
+    """Combines the PackIndexFile and the PackFile into one, allowing the
+    actual objects to be resolved and iterated"""
+
+    __slots__ = ('_index',           # our index file
+                 '_pack',            # our pack file
+                 '_offset_map'       # on demand dict mapping one offset to the next consecutive one
+                 )
+
+    IndexFileCls = PackIndexFile
+    PackFileCls = PackFile
+
+    def __init__(self, pack_or_index_path):
+        """Initialize ourselves with the path to the respective pack or index file"""
+        basename, ext = os.path.splitext(pack_or_index_path)
+        self._index = self.IndexFileCls("%s.idx" % basename)            # PackIndexFile instance
+        self._pack = self.PackFileCls("%s.pack" % basename)         # corresponding PackFile instance
+
+    def close(self):
+        self._index.close()
+        self._pack.close()
+
+    def _set_cache_(self, attr):
+        # currently this can only be _offset_map
+        # TODO: make this a simple sorted offset array which can be bisected
+        # to find the respective entry, from which we can take a +1 easily
+        # This might be slower, but should also be much lighter in memory !
+        offsets_sorted = sorted(self._index.offsets())
+        last_offset = len(self._pack.data()) - self._pack.footer_size
+        assert offsets_sorted, "Cannot handle empty indices"
+
+        offset_map = None
+        if len(offsets_sorted) == 1:
+            offset_map = {offsets_sorted[0]: last_offset}
+        else:
+            iter_offsets = iter(offsets_sorted)
+            iter_offsets_plus_one = iter(offsets_sorted)
+            next(iter_offsets_plus_one)
+            consecutive = izip(iter_offsets, iter_offsets_plus_one)
+
+            offset_map = dict(consecutive)
+
+            # the last offset is not yet set
+            offset_map[offsets_sorted[-1]] = last_offset
+        # END handle offset amount
+        self._offset_map = offset_map
+
+    def _sha_to_index(self, sha):
+        """:return: index for the given sha, or raise"""
+        index = self._index.sha_to_index(sha)
+        if index is None:
+            raise BadObject(sha)
+        return index
+
+    def _iter_objects(self, as_stream):
+        """Iterate over all objects in our index and yield their OInfo or OStream instences"""
+        _sha = self._index.sha
+        _object = self._object
+        for index in xrange(self._index.size()):
+            yield _object(_sha(index), as_stream, index)
+        # END for each index
+
+    def _object(self, sha, as_stream, index=-1):
+        """:return: OInfo or OStream object providing information about the given sha
+        :param index: if not -1, its assumed to be the sha's index in the IndexFile"""
+        # its a little bit redundant here, but it needs to be efficient
+        if index < 0:
+            index = self._sha_to_index(sha)
+        if sha is None:
+            sha = self._index.sha(index)
+        # END assure sha is present ( in output )
+        offset = self._index.offset(index)
+        type_id, uncomp_size, data_rela_offset = pack_object_header_info(self._pack._cursor.use_region(offset).buffer())
+        if as_stream:
+            if type_id not in delta_types:
+                packstream = self._pack.stream(offset)
+                return OStream(sha, packstream.type, packstream.size, packstream.stream)
+            # END handle non-deltas
+
+            # produce a delta stream containing all info
+            # To prevent it from applying the deltas when querying the size,
+            # we extract it from the delta stream ourselves
+            streams = self.collect_streams_at_offset(offset)
+            dstream = DeltaApplyReader.new(streams)
+
+            return ODeltaStream(sha, dstream.type, None, dstream)
+        else:
+            if type_id not in delta_types:
+                return OInfo(sha, type_id_to_type_map[type_id], uncomp_size)
+            # END handle non-deltas
+
+            # deltas are a little tougher - unpack the first bytes to obtain
+            # the actual target size, as opposed to the size of the delta data
+            streams = self.collect_streams_at_offset(offset)
+            buf = streams[0].read(512)
+            offset, src_size = msb_size(buf)
+            offset, target_size = msb_size(buf, offset)
+
+            # collect the streams to obtain the actual object type
+            if streams[-1].type_id in delta_types:
+                raise BadObject(sha, "Could not resolve delta object")
+            return OInfo(sha, streams[-1].type, target_size)
+        # END handle stream
+
+    #{ Read-Database like Interface
+
+    def info(self, sha):
+        """Retrieve information about the object identified by the given sha
+
+        :param sha: 20 byte sha1
+        :raise BadObject:
+        :return: OInfo instance, with 20 byte sha"""
+        return self._object(sha, False)
+
+    def stream(self, sha):
+        """Retrieve an object stream along with its information as identified by the given sha
+
+        :param sha: 20 byte sha1
+        :raise BadObject:
+        :return: OStream instance, with 20 byte sha"""
+        return self._object(sha, True)
+
+    def info_at_index(self, index):
+        """As ``info``, but uses a PackIndexFile compatible index to refer to the object"""
+        return self._object(None, False, index)
+
+    def stream_at_index(self, index):
+        """As ``stream``, but uses a PackIndexFile compatible index to refer to the
+        object"""
+        return self._object(None, True, index)
+
+    #} END Read-Database like Interface
+
+    #{ Interface
+
+    def pack(self):
+        """:return: the underlying pack file instance"""
+        return self._pack
+
+    def index(self):
+        """:return: the underlying pack index file instance"""
+        return self._index
+
+    def is_valid_stream(self, sha, use_crc=False):
+        """
+        Verify that the stream at the given sha is valid.
+
+        :param use_crc: if True, the index' crc is run over the compressed stream of
+            the object, which is much faster than checking the sha1. It is also
+            more prone to unnoticed corruption or manipulation.
+        :param sha: 20 byte sha1 of the object whose stream to verify
+            whether the compressed stream of the object is valid. If it is
+            a delta, this only verifies that the delta's data is valid, not the
+            data of the actual undeltified object, as it depends on more than
+            just this stream.
+            If False, the object will be decompressed and the sha generated. It must
+            match the given sha
+
+        :return: True if the stream is valid
+        :raise UnsupportedOperation: If the index is version 1 only
+        :raise BadObject: sha was not found"""
+        if use_crc:
+            if self._index.version() < 2:
+                raise UnsupportedOperation("Version 1 indices do not contain crc's, verify by sha instead")
+            # END handle index version
+
+            index = self._sha_to_index(sha)
+            offset = self._index.offset(index)
+            next_offset = self._offset_map[offset]
+            crc_value = self._index.crc(index)
+
+            # create the current crc value, on the compressed object data
+            # Read it in chunks, without copying the data
+            crc_update = zlib.crc32
+            pack_data = self._pack.data()
+            cur_pos = offset
+            this_crc_value = 0
+            while cur_pos < next_offset:
+                rbound = min(cur_pos + chunk_size, next_offset)
+                size = rbound - cur_pos
+                this_crc_value = crc_update(buffer(pack_data, cur_pos, size), this_crc_value)
+                cur_pos += size
+            # END window size loop
+
+            # crc returns signed 32 bit numbers, the AND op forces it into unsigned
+            # mode ... wow, sneaky, from dulwich.
+            return (this_crc_value & 0xffffffff) == crc_value
+        else:
+            shawriter = Sha1Writer()
+            stream = self._object(sha, as_stream=True)
+            # write a loose object, which is the basis for the sha
+            write_object(stream.type, stream.size, stream.read, shawriter.write)
+
+            assert shawriter.sha(as_hex=False) == sha
+            return shawriter.sha(as_hex=False) == sha
+        # END handle crc/sha verification
+        return True
+
+    def info_iter(self):
+        """
+        :return: Iterator over all objects in this pack. The iterator yields
+            OInfo instances"""
+        return self._iter_objects(as_stream=False)
+
+    def stream_iter(self):
+        """
+        :return: iterator over all objects in this pack. The iterator yields
+            OStream instances"""
+        return self._iter_objects(as_stream=True)
+
+    def collect_streams_at_offset(self, offset):
+        """
+        As the version in the PackFile, but can resolve REF deltas within this pack
+        For more info, see ``collect_streams``
+
+        :param offset: offset into the pack file at which the object can be found"""
+        streams = self._pack.collect_streams(offset)
+
+        # try to resolve the last one if needed. It is assumed to be either
+        # a REF delta, or a base object, as OFFSET deltas are resolved by the pack
+        if streams[-1].type_id == REF_DELTA:
+            stream = streams[-1]
+            while stream.type_id in delta_types:
+                if stream.type_id == REF_DELTA:
+                    sindex = self._index.sha_to_index(to_bytes(stream.delta_info))
+                    if sindex is None:
+                        break
+                    stream = self._pack.stream(self._index.offset(sindex))
+                    streams.append(stream)
+                else:
+                    # must be another OFS DELTA - this could happen if a REF
+                    # delta we resolve previously points to an OFS delta. Who
+                    # would do that ;) ? We can handle it though
+                    stream = self._pack.stream(stream.delta_info)
+                    streams.append(stream)
+                # END handle ref delta
+            # END resolve ref streams
+        # END resolve streams
+
+        return streams
+
+    def collect_streams(self, sha):
+        """
+        As ``PackFile.collect_streams``, but takes a sha instead of an offset.
+        Additionally, ref_delta streams will be resolved within this pack.
+        If this is not possible, the stream will be left alone, hence it is adivsed
+        to check for unresolved ref-deltas and resolve them before attempting to
+        construct a delta stream.
+
+        :param sha: 20 byte sha1 specifying the object whose related streams you want to collect
+        :return: list of streams, first being the actual object delta, the last being
+            a possibly unresolved base object.
+        :raise BadObject:"""
+        return self.collect_streams_at_offset(self._index.offset(self._sha_to_index(sha)))
+
+    @classmethod
+    def write_pack(cls, object_iter, pack_write, index_write=None,
+                   object_count=None, zlib_compression=zlib.Z_BEST_SPEED):
+        """
+        Create a new pack by putting all objects obtained by the object_iterator
+        into a pack which is written using the pack_write method.
+        The respective index is produced as well if index_write is not Non.
+
+        :param object_iter: iterator yielding odb output objects
+        :param pack_write: function to receive strings to write into the pack stream
+        :param indx_write: if not None, the function writes the index file corresponding
+            to the pack.
+        :param object_count: if you can provide the amount of objects in your iteration,
+            this would be the place to put it. Otherwise we have to pre-iterate and store
+            all items into a list to get the number, which uses more memory than necessary.
+        :param zlib_compression: the zlib compression level to use
+        :return: tuple(pack_sha, index_binsha) binary sha over all the contents of the pack
+            and over all contents of the index. If index_write was None, index_binsha will be None
+
+        **Note:** The destination of the write functions is up to the user. It could
+        be a socket, or a file for instance
+
+        **Note:** writes only undeltified objects"""
+        objs = object_iter
+        if not object_count:
+            if not isinstance(object_iter, (tuple, list)):
+                objs = list(object_iter)
+            # END handle list type
+            object_count = len(objs)
+        # END handle object
+
+        pack_writer = FlexibleSha1Writer(pack_write)
+        pwrite = pack_writer.write
+        ofs = 0                                         # current offset into the pack file
+        index = None
+        wants_index = index_write is not None
+
+        # write header
+        pwrite(pack('>LLL', PackFile.pack_signature, PackFile.pack_version_default, object_count))
+        ofs += 12
+
+        if wants_index:
+            index = IndexWriter()
+        # END handle index header
+
+        actual_count = 0
+        for obj in objs:
+            actual_count += 1
+            crc = 0
+
+            # object header
+            hdr = create_pack_object_header(obj.type_id, obj.size)
+            if index_write:
+                crc = crc32(hdr)
+            else:
+                crc = None
+            # END handle crc
+            pwrite(hdr)
+
+            # data stream
+            zstream = zlib.compressobj(zlib_compression)
+            ostream = obj.stream
+            br, bw, crc = write_stream_to_pack(ostream.read, pwrite, zstream, base_crc=crc)
+            assert(br == obj.size)
+            if wants_index:
+                index.append(obj.binsha, crc, ofs)
+            # END handle index
+
+            ofs += len(hdr) + bw
+            if actual_count == object_count:
+                break
+            # END abort once we are done
+        # END for each object
+
+        if actual_count != object_count:
+            raise ValueError(
+                "Expected to write %i objects into pack, but received only %i from iterators" % (object_count, actual_count))
+        # END count assertion
+
+        # write footer
+        pack_sha = pack_writer.sha(as_hex=False)
+        assert len(pack_sha) == 20
+        pack_write(pack_sha)
+        ofs += len(pack_sha)                            # just for completeness ;)
+
+        index_sha = None
+        if wants_index:
+            index_sha = index.write(pack_sha, index_write)
+        # END handle index
+
+        return pack_sha, index_sha
+
+    @classmethod
+    def create(cls, object_iter, base_dir, object_count=None, zlib_compression=zlib.Z_BEST_SPEED):
+        """Create a new on-disk entity comprised of a properly named pack file and a properly named
+        and corresponding index file. The pack contains all OStream objects contained in object iter.
+        :param base_dir: directory which is to contain the files
+        :return: PackEntity instance initialized with the new pack
+
+        **Note:** for more information on the other parameters see the write_pack method"""
+        pack_fd, pack_path = tempfile.mkstemp('', 'pack', base_dir)
+        index_fd, index_path = tempfile.mkstemp('', 'index', base_dir)
+        pack_write = lambda d: os.write(pack_fd, d)
+        index_write = lambda d: os.write(index_fd, d)
+
+        pack_binsha, index_binsha = cls.write_pack(object_iter, pack_write, index_write, object_count, zlib_compression)
+        os.close(pack_fd)
+        os.close(index_fd)
+
+        fmt = "pack-%s.%s"
+        new_pack_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'pack'))
+        new_index_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'idx'))
+        os.rename(pack_path, new_pack_path)
+        os.rename(index_path, new_index_path)
+
+        return cls(new_pack_path)
+
+    #} END interface
diff --git a/libs/gitdb/stream.py b/libs/gitdb/stream.py
new file mode 100644
index 000000000..2f4c12dfb
--- /dev/null
+++ b/libs/gitdb/stream.py
@@ -0,0 +1,732 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from io import BytesIO
+
+import mmap
+import os
+import sys
+import zlib
+
+from gitdb.fun import (
+    msb_size,
+    stream_copy,
+    apply_delta_data,
+    connect_deltas,
+    delta_types
+)
+
+from gitdb.util import (
+    allocate_memory,
+    LazyMixin,
+    make_sha,
+    write,
+    close,
+)
+
+from gitdb.const import NULL_BYTE, BYTE_SPACE
+from gitdb.utils.compat import buffer
+from gitdb.utils.encoding import force_bytes
+
+has_perf_mod = False
+PY26 = sys.version_info[:2] < (2, 7)
+try:
+    from gitdb_speedups._perf import apply_delta as c_apply_delta
+    has_perf_mod = True
+except ImportError:
+    pass
+
+__all__ = ('DecompressMemMapReader', 'FDCompressedSha1Writer', 'DeltaApplyReader',
+           'Sha1Writer', 'FlexibleSha1Writer', 'ZippedStoreShaWriter', 'FDCompressedSha1Writer',
+           'FDStream', 'NullStream')
+
+
+#{ RO Streams
+
+class DecompressMemMapReader(LazyMixin):
+
+    """Reads data in chunks from a memory map and decompresses it. The client sees
+    only the uncompressed data, respective file-like read calls are handling on-demand
+    buffered decompression accordingly
+
+    A constraint on the total size of bytes is activated, simulating
+    a logical file within a possibly larger physical memory area
+
+    To read efficiently, you clearly don't want to read individual bytes, instead,
+    read a few kilobytes at least.
+
+    **Note:** The chunk-size should be carefully selected as it will involve quite a bit
+        of string copying due to the way the zlib is implemented. Its very wasteful,
+        hence we try to find a good tradeoff between allocation time and number of
+        times we actually allocate. An own zlib implementation would be good here
+        to better support streamed reading - it would only need to keep the mmap
+        and decompress it into chunks, that's all ... """
+    __slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close',
+                 '_cbr', '_phi')
+
+    max_read_size = 512 * 1024        # currently unused
+
+    def __init__(self, m, close_on_deletion, size=None):
+        """Initialize with mmap for stream reading
+        :param m: must be content data - use new if you have object data and no size"""
+        self._m = m
+        self._zip = zlib.decompressobj()
+        self._buf = None                        # buffer of decompressed bytes
+        self._buflen = 0                        # length of bytes in buffer
+        if size is not None:
+            self._s = size                      # size of uncompressed data to read in total
+        self._br = 0                            # num uncompressed bytes read
+        self._cws = 0                           # start byte of compression window
+        self._cwe = 0                           # end byte of compression window
+        self._cbr = 0                           # number of compressed bytes read
+        self._phi = False                       # is True if we parsed the header info
+        self._close = close_on_deletion         # close the memmap on deletion ?
+
+    def _set_cache_(self, attr):
+        assert attr == '_s'
+        # only happens for size, which is a marker to indicate we still
+        # have to parse the header from the stream
+        self._parse_header_info()
+
+    def __del__(self):
+        self.close()
+
+    def _parse_header_info(self):
+        """If this stream contains object data, parse the header info and skip the
+        stream to a point where each read will yield object content
+
+        :return: parsed type_string, size"""
+        # read header
+        # should really be enough, cgit uses 8192 I believe
+        # And for good reason !! This needs to be that high for the header to be read correctly in all cases
+        maxb = 8192
+        self._s = maxb
+        hdr = self.read(maxb)
+        hdrend = hdr.find(NULL_BYTE)
+        typ, size = hdr[:hdrend].split(BYTE_SPACE)
+        size = int(size)
+        self._s = size
+
+        # adjust internal state to match actual header length that we ignore
+        # The buffer will be depleted first on future reads
+        self._br = 0
+        hdrend += 1
+        self._buf = BytesIO(hdr[hdrend:])
+        self._buflen = len(hdr) - hdrend
+
+        self._phi = True
+
+        return typ, size
+
+    #{ Interface
+
+    @classmethod
+    def new(self, m, close_on_deletion=False):
+        """Create a new DecompressMemMapReader instance for acting as a read-only stream
+        This method parses the object header from m and returns the parsed
+        type and size, as well as the created stream instance.
+
+        :param m: memory map on which to operate. It must be object data ( header + contents )
+        :param close_on_deletion: if True, the memory map will be closed once we are
+            being deleted"""
+        inst = DecompressMemMapReader(m, close_on_deletion, 0)
+        typ, size = inst._parse_header_info()
+        return typ, size, inst
+
+    def data(self):
+        """:return: random access compatible data we are working on"""
+        return self._m
+
+    def close(self):
+        """Close our underlying stream of compressed bytes if this was allowed during initialization
+        :return: True if we closed the underlying stream
+        :note: can be called safely 
+        """
+        if self._close:
+            if hasattr(self._m, 'close'):
+                self._m.close()
+            self._close = False
+        # END handle resource freeing
+
+    def compressed_bytes_read(self):
+        """
+        :return: number of compressed bytes read. This includes the bytes it
+            took to decompress the header ( if there was one )"""
+        # ABSTRACT: When decompressing a byte stream, it can be that the first
+        # x bytes which were requested match the first x bytes in the loosely
+        # compressed datastream. This is the worst-case assumption that the reader
+        # does, it assumes that it will get at least X bytes from X compressed bytes
+        # in call cases.
+        # The caveat is that the object, according to our known uncompressed size,
+        # is already complete, but there are still some bytes left in the compressed
+        # stream that contribute to the amount of compressed bytes.
+        # How can we know that we are truly done, and have read all bytes we need
+        # to read ?
+        # Without help, we cannot know, as we need to obtain the status of the
+        # decompression. If it is not finished, we need to decompress more data
+        # until it is finished, to yield the actual number of compressed bytes
+        # belonging to the decompressed object
+        # We are using a custom zlib module for this, if its not present,
+        # we try to put in additional bytes up for decompression if feasible
+        # and check for the unused_data.
+
+        # Only scrub the stream forward if we are officially done with the
+        # bytes we were to have.
+        if self._br == self._s and not self._zip.unused_data:
+            # manipulate the bytes-read to allow our own read method to continue
+            # but keep the window at its current position
+            self._br = 0
+            if hasattr(self._zip, 'status'):
+                while self._zip.status == zlib.Z_OK:
+                    self.read(mmap.PAGESIZE)
+                # END scrub-loop custom zlib
+            else:
+                # pass in additional pages, until we have unused data
+                while not self._zip.unused_data and self._cbr != len(self._m):
+                    self.read(mmap.PAGESIZE)
+                # END scrub-loop default zlib
+            # END handle stream scrubbing
+
+            # reset bytes read, just to be sure
+            self._br = self._s
+        # END handle stream scrubbing
+
+        # unused data ends up in the unconsumed tail, which was removed
+        # from the count already
+        return self._cbr
+
+    #} END interface
+
+    def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)):
+        """Allows to reset the stream to restart reading
+        :raise ValueError: If offset and whence are not 0"""
+        if offset != 0 or whence != getattr(os, 'SEEK_SET', 0):
+            raise ValueError("Can only seek to position 0")
+        # END handle offset
+
+        self._zip = zlib.decompressobj()
+        self._br = self._cws = self._cwe = self._cbr = 0
+        if self._phi:
+            self._phi = False
+            del(self._s)        # trigger header parsing on first access
+        # END skip header
+
+    def read(self, size=-1):
+        if size < 1:
+            size = self._s - self._br
+        else:
+            size = min(size, self._s - self._br)
+        # END clamp size
+
+        if size == 0:
+            return bytes()
+        # END handle depletion
+
+        # deplete the buffer, then just continue using the decompress object
+        # which has an own buffer. We just need this to transparently parse the
+        # header from the zlib stream
+        dat = bytes()
+        if self._buf:
+            if self._buflen >= size:
+                # have enough data
+                dat = self._buf.read(size)
+                self._buflen -= size
+                self._br += size
+                return dat
+            else:
+                dat = self._buf.read()      # ouch, duplicates data
+                size -= self._buflen
+                self._br += self._buflen
+
+                self._buflen = 0
+                self._buf = None
+            # END handle buffer len
+        # END handle buffer
+
+        # decompress some data
+        # Abstract: zlib needs to operate on chunks of our memory map ( which may
+        # be large ), as it will otherwise and always fill in the 'unconsumed_tail'
+        # attribute which possible reads our whole map to the end, forcing
+        # everything to be read from disk even though just a portion was requested.
+        # As this would be a nogo, we workaround it by passing only chunks of data,
+        # moving the window into the memory map along as we decompress, which keeps
+        # the tail smaller than our chunk-size. This causes 'only' the chunk to be
+        # copied once, and another copy of a part of it when it creates the unconsumed
+        # tail. We have to use it to hand in the appropriate amount of bytes during
+        # the next read.
+        tail = self._zip.unconsumed_tail
+        if tail:
+            # move the window, make it as large as size demands. For code-clarity,
+            # we just take the chunk from our map again instead of reusing the unconsumed
+            # tail. The latter one would safe some memory copying, but we could end up
+            # with not getting enough data uncompressed, so we had to sort that out as well.
+            # Now we just assume the worst case, hence the data is uncompressed and the window
+            # needs to be as large as the uncompressed bytes we want to read.
+            self._cws = self._cwe - len(tail)
+            self._cwe = self._cws + size
+        else:
+            cws = self._cws
+            self._cws = self._cwe
+            self._cwe = cws + size
+        # END handle tail
+
+        # if window is too small, make it larger so zip can decompress something
+        if self._cwe - self._cws < 8:
+            self._cwe = self._cws + 8
+        # END adjust winsize
+
+        # takes a slice, but doesn't copy the data, it says ...
+        indata = buffer(self._m, self._cws, self._cwe - self._cws)
+
+        # get the actual window end to be sure we don't use it for computations
+        self._cwe = self._cws + len(indata)
+        dcompdat = self._zip.decompress(indata, size)
+        # update the amount of compressed bytes read
+        # We feed possibly overlapping chunks, which is why the unconsumed tail
+        # has to be taken into consideration, as well as the unused data
+        # if we hit the end of the stream
+        # NOTE: Behavior changed in PY2.7 onward, which requires special handling to make the tests work properly.
+        # They are thorough, and I assume it is truly working.
+        # Why is this logic as convoluted as it is ? Please look at the table in 
+        # https://github.com/gitpython-developers/gitdb/issues/19 to learn about the test-results.
+        # Bascially, on py2.6, you want to use branch 1, whereas on all other python version, the second branch
+        # will be the one that works. 
+        # However, the zlib VERSIONs as well as the platform check is used to further match the entries in the 
+        # table in the github issue. This is it ... it was the only way I could make this work everywhere.
+        # IT's CERTAINLY GOING TO BITE US IN THE FUTURE ... .
+        if PY26 or ((zlib.ZLIB_VERSION == '1.2.7' or zlib.ZLIB_VERSION == '1.2.5') and not sys.platform == 'darwin'):
+            unused_datalen = len(self._zip.unconsumed_tail)
+        else:
+            unused_datalen = len(self._zip.unconsumed_tail) + len(self._zip.unused_data)
+        # # end handle very special case ...
+
+        self._cbr += len(indata) - unused_datalen
+        self._br += len(dcompdat)
+
+        if dat:
+            dcompdat = dat + dcompdat
+        # END prepend our cached data
+
+        # it can happen, depending on the compression, that we get less bytes
+        # than ordered as it needs the final portion of the data as well.
+        # Recursively resolve that.
+        # Note: dcompdat can be empty even though we still appear to have bytes
+        # to read, if we are called by compressed_bytes_read - it manipulates
+        # us to empty the stream
+        if dcompdat and (len(dcompdat) - len(dat)) < size and self._br < self._s:
+            dcompdat += self.read(size - len(dcompdat))
+        # END handle special case
+        return dcompdat
+
+
+class DeltaApplyReader(LazyMixin):
+
+    """A reader which dynamically applies pack deltas to a base object, keeping the
+    memory demands to a minimum.
+
+    The size of the final object is only obtainable once all deltas have been
+    applied, unless it is retrieved from a pack index.
+
+    The uncompressed Delta has the following layout (MSB being a most significant
+    bit encoded dynamic size):
+
+    * MSB Source Size - the size of the base against which the delta was created
+    * MSB Target Size - the size of the resulting data after the delta was applied
+    * A list of one byte commands (cmd) which are followed by a specific protocol:
+
+     * cmd & 0x80 - copy delta_data[offset:offset+size]
+
+      * Followed by an encoded offset into the delta data
+      * Followed by an encoded size of the chunk to copy
+
+     *  cmd & 0x7f - insert
+
+      * insert cmd bytes from the delta buffer into the output stream
+
+     * cmd == 0 - invalid operation ( or error in delta stream )
+    """
+    __slots__ = (
+        "_bstream",             # base stream to which to apply the deltas
+        "_dstreams",            # tuple of delta stream readers
+        "_mm_target",           # memory map of the delta-applied data
+        "_size",                # actual number of bytes in _mm_target
+        "_br"                   # number of bytes read
+    )
+
+    #{ Configuration
+    k_max_memory_move = 250 * 1000 * 1000
+    #} END configuration
+
+    def __init__(self, stream_list):
+        """Initialize this instance with a list of streams, the first stream being
+        the delta to apply on top of all following deltas, the last stream being the
+        base object onto which to apply the deltas"""
+        assert len(stream_list) > 1, "Need at least one delta and one base stream"
+
+        self._bstream = stream_list[-1]
+        self._dstreams = tuple(stream_list[:-1])
+        self._br = 0
+
+    def _set_cache_too_slow_without_c(self, attr):
+        # the direct algorithm is fastest and most direct if there is only one
+        # delta. Also, the extra overhead might not be worth it for items smaller
+        # than X - definitely the case in python, every function call costs
+        # huge amounts of time
+        # if len(self._dstreams) * self._bstream.size < self.k_max_memory_move:
+        if len(self._dstreams) == 1:
+            return self._set_cache_brute_(attr)
+
+        # Aggregate all deltas into one delta in reverse order. Hence we take
+        # the last delta, and reverse-merge its ancestor delta, until we receive
+        # the final delta data stream.
+        dcl = connect_deltas(self._dstreams)
+
+        # call len directly, as the (optional) c version doesn't implement the sequence
+        # protocol
+        if dcl.rbound() == 0:
+            self._size = 0
+            self._mm_target = allocate_memory(0)
+            return
+        # END handle empty list
+
+        self._size = dcl.rbound()
+        self._mm_target = allocate_memory(self._size)
+
+        bbuf = allocate_memory(self._bstream.size)
+        stream_copy(self._bstream.read, bbuf.write, self._bstream.size, 256 * mmap.PAGESIZE)
+
+        # APPLY CHUNKS
+        write = self._mm_target.write
+        dcl.apply(bbuf, write)
+
+        self._mm_target.seek(0)
+
+    def _set_cache_brute_(self, attr):
+        """If we are here, we apply the actual deltas"""
+        # TODO: There should be a special case if there is only one stream
+        # Then the default-git algorithm should perform a tad faster, as the
+        # delta is not peaked into, causing less overhead.
+        buffer_info_list = list()
+        max_target_size = 0
+        for dstream in self._dstreams:
+            buf = dstream.read(512)         # read the header information + X
+            offset, src_size = msb_size(buf)
+            offset, target_size = msb_size(buf, offset)
+            buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
+            max_target_size = max(max_target_size, target_size)
+        # END for each delta stream
+
+        # sanity check - the first delta to apply should have the same source
+        # size as our actual base stream
+        base_size = self._bstream.size
+        target_size = max_target_size
+
+        # if we have more than 1 delta to apply, we will swap buffers, hence we must
+        # assure that all buffers we use are large enough to hold all the results
+        if len(self._dstreams) > 1:
+            base_size = target_size = max(base_size, max_target_size)
+        # END adjust buffer sizes
+
+        # Allocate private memory map big enough to hold the first base buffer
+        # We need random access to it
+        bbuf = allocate_memory(base_size)
+        stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)
+
+        # allocate memory map large enough for the largest (intermediate) target
+        # We will use it as scratch space for all delta ops. If the final
+        # target buffer is smaller than our allocated space, we just use parts
+        # of it upon return.
+        tbuf = allocate_memory(target_size)
+
+        # for each delta to apply, memory map the decompressed delta and
+        # work on the op-codes to reconstruct everything.
+        # For the actual copying, we use a seek and write pattern of buffer
+        # slices.
+        final_target_size = None
+        for (dbuf, offset, src_size, target_size), dstream in zip(reversed(buffer_info_list), reversed(self._dstreams)):
+            # allocate a buffer to hold all delta data - fill in the data for
+            # fast access. We do this as we know that reading individual bytes
+            # from our stream would be slower than necessary ( although possible )
+            # The dbuf buffer contains commands after the first two MSB sizes, the
+            # offset specifies the amount of bytes read to get the sizes.
+            ddata = allocate_memory(dstream.size - offset)
+            ddata.write(dbuf)
+            # read the rest from the stream. The size we give is larger than necessary
+            stream_copy(dstream.read, ddata.write, dstream.size, 256 * mmap.PAGESIZE)
+
+            #######################################################################
+            if 'c_apply_delta' in globals():
+                c_apply_delta(bbuf, ddata, tbuf)
+            else:
+                apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
+            #######################################################################
+
+            # finally, swap out source and target buffers. The target is now the
+            # base for the next delta to apply
+            bbuf, tbuf = tbuf, bbuf
+            bbuf.seek(0)
+            tbuf.seek(0)
+            final_target_size = target_size
+        # END for each delta to apply
+
+        # its already seeked to 0, constrain it to the actual size
+        # NOTE: in the end of the loop, it swaps buffers, hence our target buffer
+        # is not tbuf, but bbuf !
+        self._mm_target = bbuf
+        self._size = final_target_size
+
+    #{ Configuration
+    if not has_perf_mod:
+        _set_cache_ = _set_cache_brute_
+    else:
+        _set_cache_ = _set_cache_too_slow_without_c
+
+    #} END configuration
+
+    def read(self, count=0):
+        bl = self._size - self._br      # bytes left
+        if count < 1 or count > bl:
+            count = bl
+        # NOTE: we could check for certain size limits, and possibly
+        # return buffers instead of strings to prevent byte copying
+        data = self._mm_target.read(count)
+        self._br += len(data)
+        return data
+
+    def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)):
+        """Allows to reset the stream to restart reading
+
+        :raise ValueError: If offset and whence are not 0"""
+        if offset != 0 or whence != getattr(os, 'SEEK_SET', 0):
+            raise ValueError("Can only seek to position 0")
+        # END handle offset
+        self._br = 0
+        self._mm_target.seek(0)
+
+    #{ Interface
+
+    @classmethod
+    def new(cls, stream_list):
+        """
+        Convert the given list of streams into a stream which resolves deltas
+        when reading from it.
+
+        :param stream_list: two or more stream objects, first stream is a Delta
+            to the object that you want to resolve, followed by N additional delta
+            streams. The list's last stream must be a non-delta stream.
+
+        :return: Non-Delta OPackStream object whose stream can be used to obtain
+            the decompressed resolved data
+        :raise ValueError: if the stream list cannot be handled"""
+        if len(stream_list) < 2:
+            raise ValueError("Need at least two streams")
+        # END single object special handling
+
+        if stream_list[-1].type_id in delta_types:
+            raise ValueError(
+                "Cannot resolve deltas if there is no base object stream, last one was type: %s" % stream_list[-1].type)
+        # END check stream
+        return cls(stream_list)
+
+    #} END interface
+
+    #{ OInfo like Interface
+
+    @property
+    def type(self):
+        return self._bstream.type
+
+    @property
+    def type_id(self):
+        return self._bstream.type_id
+
+    @property
+    def size(self):
+        """:return: number of uncompressed bytes in the stream"""
+        return self._size
+
+    #} END oinfo like interface
+
+
+#} END RO streams
+
+
+#{ W Streams
+
+class Sha1Writer(object):
+
+    """Simple stream writer which produces a sha whenever you like as it degests
+    everything it is supposed to write"""
+    __slots__ = "sha1"
+
+    def __init__(self):
+        self.sha1 = make_sha()
+
+    #{ Stream Interface
+
+    def write(self, data):
+        """:raise IOError: If not all bytes could be written
+        :param data: byte object
+        :return: length of incoming data"""
+
+        self.sha1.update(data)
+
+        return len(data)
+
+    # END stream interface
+
+    #{ Interface
+
+    def sha(self, as_hex=False):
+        """:return: sha so far
+        :param as_hex: if True, sha will be hex-encoded, binary otherwise"""
+        if as_hex:
+            return self.sha1.hexdigest()
+        return self.sha1.digest()
+
+    #} END interface
+
+
+class FlexibleSha1Writer(Sha1Writer):
+
+    """Writer producing a sha1 while passing on the written bytes to the given
+    write function"""
+    __slots__ = 'writer'
+
+    def __init__(self, writer):
+        Sha1Writer.__init__(self)
+        self.writer = writer
+
+    def write(self, data):
+        Sha1Writer.write(self, data)
+        self.writer(data)
+
+
+class ZippedStoreShaWriter(Sha1Writer):
+
+    """Remembers everything someone writes to it and generates a sha"""
+    __slots__ = ('buf', 'zip')
+
+    def __init__(self):
+        Sha1Writer.__init__(self)
+        self.buf = BytesIO()
+        self.zip = zlib.compressobj(zlib.Z_BEST_SPEED)
+
+    def __getattr__(self, attr):
+        return getattr(self.buf, attr)
+
+    def write(self, data):
+        alen = Sha1Writer.write(self, data)
+        self.buf.write(self.zip.compress(data))
+
+        return alen
+
+    def close(self):
+        self.buf.write(self.zip.flush())
+
+    def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)):
+        """Seeking currently only supports to rewind written data
+        Multiple writes are not supported"""
+        if offset != 0 or whence != getattr(os, 'SEEK_SET', 0):
+            raise ValueError("Can only seek to position 0")
+        # END handle offset
+        self.buf.seek(0)
+
+    def getvalue(self):
+        """:return: string value from the current stream position to the end"""
+        return self.buf.getvalue()
+
+
+class FDCompressedSha1Writer(Sha1Writer):
+
+    """Digests data written to it, making the sha available, then compress the
+    data and write it to the file descriptor
+
+    **Note:** operates on raw file descriptors
+    **Note:** for this to work, you have to use the close-method of this instance"""
+    __slots__ = ("fd", "sha1", "zip")
+
+    # default exception
+    exc = IOError("Failed to write all bytes to filedescriptor")
+
+    def __init__(self, fd):
+        super(FDCompressedSha1Writer, self).__init__()
+        self.fd = fd
+        self.zip = zlib.compressobj(zlib.Z_BEST_SPEED)
+
+    #{ Stream Interface
+
+    def write(self, data):
+        """:raise IOError: If not all bytes could be written
+        :return: length of incoming data"""
+        self.sha1.update(data)
+        cdata = self.zip.compress(data)
+        bytes_written = write(self.fd, cdata)
+
+        if bytes_written != len(cdata):
+            raise self.exc
+
+        return len(data)
+
+    def close(self):
+        remainder = self.zip.flush()
+        if write(self.fd, remainder) != len(remainder):
+            raise self.exc
+        return close(self.fd)
+
+    #} END stream interface
+
+
+class FDStream(object):
+
+    """A simple wrapper providing the most basic functions on a file descriptor
+    with the fileobject interface. Cannot use os.fdopen as the resulting stream
+    takes ownership"""
+    __slots__ = ("_fd", '_pos')
+
+    def __init__(self, fd):
+        self._fd = fd
+        self._pos = 0
+
+    def write(self, data):
+        self._pos += len(data)
+        os.write(self._fd, data)
+
+    def read(self, count=0):
+        if count == 0:
+            count = os.path.getsize(self._filepath)
+        # END handle read everything
+
+        bytes = os.read(self._fd, count)
+        self._pos += len(bytes)
+        return bytes
+
+    def fileno(self):
+        return self._fd
+
+    def tell(self):
+        return self._pos
+
+    def close(self):
+        close(self._fd)
+
+
+class NullStream(object):
+
+    """A stream that does nothing but providing a stream interface.
+    Use it like /dev/null"""
+    __slots__ = tuple()
+
+    def read(self, size=0):
+        return ''
+
+    def close(self):
+        pass
+
+    def write(self, data):
+        return len(data)
+
+
+#} END W streams
diff --git a/libs/gitdb/test/__init__.py b/libs/gitdb/test/__init__.py
new file mode 100644
index 000000000..8a681e428
--- /dev/null
+++ b/libs/gitdb/test/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/libs/gitdb/test/lib.py b/libs/gitdb/test/lib.py
new file mode 100644
index 000000000..bbdd241cd
--- /dev/null
+++ b/libs/gitdb/test/lib.py
@@ -0,0 +1,208 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Utilities used in ODB testing"""
+from gitdb import OStream
+from gitdb.utils.compat import xrange
+
+import sys
+import random
+from array import array
+
+from io import BytesIO
+
+import glob
+import unittest
+import tempfile
+import shutil
+import os
+import gc
+import logging
+from functools import wraps
+
+
+#{ Bases
+
+class TestBase(unittest.TestCase):
+    """Base class for all tests
+
+    TestCase providing access to readonly repositories using the following member variables.
+
+    * gitrepopath
+
+     * read-only base path of the git source repository, i.e. .../git/.git
+    """
+
+    #{ Invvariants
+    k_env_git_repo = "GITDB_TEST_GIT_REPO_BASE"
+    #} END invariants
+
+    @classmethod
+    def setUpClass(cls):
+        try:
+            super(TestBase, cls).setUpClass()
+        except AttributeError:
+            pass
+
+        cls.gitrepopath = os.environ.get(cls.k_env_git_repo)
+        if not cls.gitrepopath:
+            logging.info(
+                "You can set the %s environment variable to a .git repository of your choice - defaulting to the gitdb repository", cls.k_env_git_repo)
+            ospd = os.path.dirname
+            cls.gitrepopath = os.path.join(ospd(ospd(ospd(__file__))), '.git')
+        # end assure gitrepo is set
+        assert cls.gitrepopath.endswith('.git')
+
+
+#} END bases
+
+#{ Decorators
+
+def skip_on_travis_ci(func):
+    """All tests decorated with this one will raise SkipTest when run on travis ci.
+    Use it to workaround difficult to solve issues
+    NOTE: copied from bcore (https://github.com/Byron/bcore)"""
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        if 'TRAVIS' in os.environ:
+            import nose
+            raise nose.SkipTest("Cannot run on travis-ci")
+        # end check for travis ci
+        return func(self, *args, **kwargs)
+    # end wrapper
+    return wrapper
+
+
+def with_rw_directory(func):
+    """Create a temporary directory which can be written to, remove it if the
+    test succeeds, but leave it otherwise to aid additional debugging"""
+
+    def wrapper(self):
+        path = tempfile.mktemp(prefix=func.__name__)
+        os.mkdir(path)
+        keep = False
+        try:
+            try:
+                return func(self, path)
+            except Exception:
+                sys.stderr.write("Test %s.%s failed, output is at %r\n" % (type(self).__name__, func.__name__, path))
+                keep = True
+                raise
+        finally:
+            # Need to collect here to be sure all handles have been closed. It appears
+            # a windows-only issue. In fact things should be deleted, as well as
+            # memory maps closed, once objects go out of scope. For some reason
+            # though this is not the case here unless we collect explicitly.
+            if not keep:
+                gc.collect()
+                shutil.rmtree(path)
+        # END handle exception
+    # END wrapper
+
+    wrapper.__name__ = func.__name__
+    return wrapper
+
+
+def with_packs_rw(func):
+    """Function that provides a path into which the packs for testing should be
+    copied. Will pass on the path to the actual function afterwards"""
+
+    def wrapper(self, path):
+        src_pack_glob = fixture_path('packs/*')
+        copy_files_globbed(src_pack_glob, path, hard_link_ok=True)
+        return func(self, path)
+    # END wrapper
+
+    wrapper.__name__ = func.__name__
+    return wrapper
+
+#} END decorators
+
+#{ Routines
+
+
+def fixture_path(relapath=''):
+    """:return: absolute path into the fixture directory
+    :param relapath: relative path into the fixtures directory, or ''
+        to obtain the fixture directory itself"""
+    return os.path.join(os.path.dirname(__file__), 'fixtures', relapath)
+
+
+def copy_files_globbed(source_glob, target_dir, hard_link_ok=False):
+    """Copy all files found according to the given source glob into the target directory
+    :param hard_link_ok: if True, hard links will be created if possible. Otherwise
+        the files will be copied"""
+    for src_file in glob.glob(source_glob):
+        if hard_link_ok and hasattr(os, 'link'):
+            target = os.path.join(target_dir, os.path.basename(src_file))
+            try:
+                os.link(src_file, target)
+            except OSError:
+                shutil.copy(src_file, target_dir)
+            # END handle cross device links ( and resulting failure )
+        else:
+            shutil.copy(src_file, target_dir)
+        # END try hard link
+    # END for each file to copy
+
+
+def make_bytes(size_in_bytes, randomize=False):
+    """:return: string with given size in bytes
+    :param randomize: try to produce a very random stream"""
+    actual_size = size_in_bytes // 4
+    producer = xrange(actual_size)
+    if randomize:
+        producer = list(producer)
+        random.shuffle(producer)
+    # END randomize
+    a = array('i', producer)
+    return a.tostring()
+
+
+def make_object(type, data):
+    """:return: bytes resembling an uncompressed object"""
+    odata = "blob %i\0" % len(data)
+    return odata.encode("ascii") + data
+
+
+def make_memory_file(size_in_bytes, randomize=False):
+    """:return: tuple(size_of_stream, stream)
+    :param randomize: try to produce a very random stream"""
+    d = make_bytes(size_in_bytes, randomize)
+    return len(d), BytesIO(d)
+
+#} END routines
+
+#{ Stream Utilities
+
+
+class DummyStream(object):
+
+    def __init__(self):
+        self.was_read = False
+        self.bytes = 0
+        self.closed = False
+
+    def read(self, size):
+        self.was_read = True
+        self.bytes = size
+
+    def close(self):
+        self.closed = True
+
+    def _assert(self):
+        assert self.was_read
+
+
+class DeriveTest(OStream):
+
+    def __init__(self, sha, type, size, stream, *args, **kwargs):
+        self.myarg = kwargs.pop('myarg')
+        self.args = args
+
+    def _assert(self):
+        assert self.args
+        assert self.myarg
+
+#} END stream utilitiess
diff --git a/libs/gitdb/test/test_base.py b/libs/gitdb/test/test_base.py
new file mode 100644
index 000000000..519cdfdc1
--- /dev/null
+++ b/libs/gitdb/test/test_base.py
@@ -0,0 +1,105 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Test for object db"""
+from gitdb.test.lib import (
+    TestBase,
+    DummyStream,
+    DeriveTest,
+)
+
+from gitdb import (
+    OInfo,
+    OPackInfo,
+    ODeltaPackInfo,
+    OStream,
+    OPackStream,
+    ODeltaPackStream,
+    IStream
+)
+from gitdb.util import (
+    NULL_BIN_SHA
+)
+
+from gitdb.typ import (
+    str_blob_type
+)
+
+
+class TestBaseTypes(TestBase):
+
+    def test_streams(self):
+        # test info
+        sha = NULL_BIN_SHA
+        s = 20
+        blob_id = 3
+
+        info = OInfo(sha, str_blob_type, s)
+        assert info.binsha == sha
+        assert info.type == str_blob_type
+        assert info.type_id == blob_id
+        assert info.size == s
+
+        # test pack info
+        # provides type_id
+        pinfo = OPackInfo(0, blob_id, s)
+        assert pinfo.type == str_blob_type
+        assert pinfo.type_id == blob_id
+        assert pinfo.pack_offset == 0
+
+        dpinfo = ODeltaPackInfo(0, blob_id, s, sha)
+        assert dpinfo.type == str_blob_type
+        assert dpinfo.type_id == blob_id
+        assert dpinfo.delta_info == sha
+        assert dpinfo.pack_offset == 0
+
+        # test ostream
+        stream = DummyStream()
+        ostream = OStream(*(info + (stream, )))
+        assert ostream.stream is stream
+        ostream.read(15)
+        stream._assert()
+        assert stream.bytes == 15
+        ostream.read(20)
+        assert stream.bytes == 20
+
+        # test packstream
+        postream = OPackStream(*(pinfo + (stream, )))
+        assert postream.stream is stream
+        postream.read(10)
+        stream._assert()
+        assert stream.bytes == 10
+
+        # test deltapackstream
+        dpostream = ODeltaPackStream(*(dpinfo + (stream, )))
+        dpostream.stream is stream
+        dpostream.read(5)
+        stream._assert()
+        assert stream.bytes == 5
+
+        # derive with own args
+        DeriveTest(sha, str_blob_type, s, stream, 'mine', myarg=3)._assert()
+
+        # test istream
+        istream = IStream(str_blob_type, s, stream)
+        assert istream.binsha == None
+        istream.binsha = sha
+        assert istream.binsha == sha
+
+        assert len(istream.binsha) == 20
+        assert len(istream.hexsha) == 40
+
+        assert istream.size == s
+        istream.size = s * 2
+        istream.size == s * 2
+        assert istream.type == str_blob_type
+        istream.type = "something"
+        assert istream.type == "something"
+        assert istream.stream is stream
+        istream.stream = None
+        assert istream.stream is None
+
+        assert istream.error is None
+        istream.error = Exception()
+        assert isinstance(istream.error, Exception)
diff --git a/libs/gitdb/test/test_example.py b/libs/gitdb/test/test_example.py
new file mode 100644
index 000000000..6e80bf5c6
--- /dev/null
+++ b/libs/gitdb/test/test_example.py
@@ -0,0 +1,43 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module with examples from the tutorial section of the docs"""
+import os
+from gitdb.test.lib import TestBase
+from gitdb import IStream
+from gitdb.db import LooseObjectDB
+
+from io import BytesIO
+
+
+class TestExamples(TestBase):
+
+    def test_base(self):
+        ldb = LooseObjectDB(os.path.join(self.gitrepopath, 'objects'))
+
+        for sha1 in ldb.sha_iter():
+            oinfo = ldb.info(sha1)
+            ostream = ldb.stream(sha1)
+            assert oinfo[:3] == ostream[:3]
+
+            assert len(ostream.read()) == ostream.size
+            assert ldb.has_object(oinfo.binsha)
+        # END for each sha in database
+        # assure we close all files
+        try:
+            del(ostream)
+            del(oinfo)
+        except UnboundLocalError:
+            pass
+        # END ignore exception if there are no loose objects
+
+        data = "my data".encode("ascii")
+        istream = IStream("blob", len(data), BytesIO(data))
+
+        # the object does not yet have a sha
+        assert istream.binsha is None
+        ldb.store(istream)
+        # now the sha is set
+        assert len(istream.binsha) == 20
+        assert ldb.has_object(istream.binsha)
diff --git a/libs/gitdb/test/test_pack.py b/libs/gitdb/test/test_pack.py
new file mode 100644
index 000000000..24e2a3134
--- /dev/null
+++ b/libs/gitdb/test/test_pack.py
@@ -0,0 +1,255 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Test everything about packs reading and writing"""
+from gitdb.test.lib import (
+    TestBase,
+    with_rw_directory,
+    fixture_path
+)
+
+from gitdb.stream import DeltaApplyReader
+
+from gitdb.pack import (
+    PackEntity,
+    PackIndexFile,
+    PackFile
+)
+
+from gitdb.base import (
+    OInfo,
+    OStream,
+)
+
+from gitdb.fun import delta_types
+from gitdb.exc import UnsupportedOperation
+from gitdb.util import to_bin_sha
+from gitdb.utils.compat import xrange
+
+try:
+    from itertools import izip
+except ImportError:
+    izip = zip
+
+from nose import SkipTest
+
+import os
+import tempfile
+
+
+#{ Utilities
+def bin_sha_from_filename(filename):
+    return to_bin_sha(os.path.splitext(os.path.basename(filename))[0][5:])
+#} END utilities
+
+
+class TestPack(TestBase):
+
+    packindexfile_v1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx'), 1, 67)
+    packindexfile_v2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx'), 2, 30)
+    packindexfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx'), 2, 42)
+    packfile_v2_1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack'), 2, packindexfile_v1[2])
+    packfile_v2_2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack'), 2, packindexfile_v2[2])
+    packfile_v2_3_ascii = (
+        fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2])
+
+    def _assert_index_file(self, index, version, size):
+        assert index.packfile_checksum() != index.indexfile_checksum()
+        assert len(index.packfile_checksum()) == 20
+        assert len(index.indexfile_checksum()) == 20
+        assert index.version() == version
+        assert index.size() == size
+        assert len(index.offsets()) == size
+
+        # get all data of all objects
+        for oidx in xrange(index.size()):
+            sha = index.sha(oidx)
+            assert oidx == index.sha_to_index(sha)
+
+            entry = index.entry(oidx)
+            assert len(entry) == 3
+
+            assert entry[0] == index.offset(oidx)
+            assert entry[1] == sha
+            assert entry[2] == index.crc(oidx)
+
+            # verify partial sha
+            for l in (4, 8, 11, 17, 20):
+                assert index.partial_sha_to_index(sha[:l], l * 2) == oidx
+
+        # END for each object index in indexfile
+        self.failUnlessRaises(ValueError, index.partial_sha_to_index, "\0", 2)
+
+    def _assert_pack_file(self, pack, version, size):
+        assert pack.version() == 2
+        assert pack.size() == size
+        assert len(pack.checksum()) == 20
+
+        num_obj = 0
+        for obj in pack.stream_iter():
+            num_obj += 1
+            info = pack.info(obj.pack_offset)
+            stream = pack.stream(obj.pack_offset)
+
+            assert info.pack_offset == stream.pack_offset
+            assert info.type_id == stream.type_id
+            assert hasattr(stream, 'read')
+
+            # it should be possible to read from both streams
+            assert obj.read() == stream.read()
+
+            streams = pack.collect_streams(obj.pack_offset)
+            assert streams
+
+            # read the stream
+            try:
+                dstream = DeltaApplyReader.new(streams)
+            except ValueError:
+                # ignore these, old git versions use only ref deltas,
+                # which we havent resolved ( as we are without an index )
+                # Also ignore non-delta streams
+                continue
+            # END get deltastream
+
+            # read all
+            data = dstream.read()
+            assert len(data) == dstream.size
+
+            # test seek
+            dstream.seek(0)
+            assert dstream.read() == data
+
+            # read chunks
+            # NOTE: the current implementation is safe, it basically transfers
+            # all calls to the underlying memory map
+
+        # END for each object
+        assert num_obj == size
+
+    def test_pack_index(self):
+        # check version 1 and 2
+        for indexfile, version, size in (self.packindexfile_v1, self.packindexfile_v2):
+            index = PackIndexFile(indexfile)
+            self._assert_index_file(index, version, size)
+        # END run tests
+
+    def test_pack(self):
+        # there is this special version 3, but apparently its like 2 ...
+        for packfile, version, size in (self.packfile_v2_3_ascii, self.packfile_v2_1, self.packfile_v2_2):
+            pack = PackFile(packfile)
+            self._assert_pack_file(pack, version, size)
+        # END for each pack to test
+
+    @with_rw_directory
+    def test_pack_entity(self, rw_dir):
+        pack_objs = list()
+        for packinfo, indexinfo in ((self.packfile_v2_1, self.packindexfile_v1),
+                                    (self.packfile_v2_2, self.packindexfile_v2),
+                                    (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)):
+            packfile, version, size = packinfo
+            indexfile, version, size = indexinfo
+            entity = PackEntity(packfile)
+            assert entity.pack().path() == packfile
+            assert entity.index().path() == indexfile
+            pack_objs.extend(entity.stream_iter())
+
+            count = 0
+            for info, stream in izip(entity.info_iter(), entity.stream_iter()):
+                count += 1
+                assert info.binsha == stream.binsha
+                assert len(info.binsha) == 20
+                assert info.type_id == stream.type_id
+                assert info.size == stream.size
+
+                # we return fully resolved items, which is implied by the sha centric access
+                assert not info.type_id in delta_types
+
+                # try all calls
+                assert len(entity.collect_streams(info.binsha))
+                oinfo = entity.info(info.binsha)
+                assert isinstance(oinfo, OInfo)
+                assert oinfo.binsha is not None
+                ostream = entity.stream(info.binsha)
+                assert isinstance(ostream, OStream)
+                assert ostream.binsha is not None
+
+                # verify the stream
+                try:
+                    assert entity.is_valid_stream(info.binsha, use_crc=True)
+                except UnsupportedOperation:
+                    pass
+                # END ignore version issues
+                assert entity.is_valid_stream(info.binsha, use_crc=False)
+            # END for each info, stream tuple
+            assert count == size
+
+        # END for each entity
+
+        # pack writing - write all packs into one
+        # index path can be None
+        pack_path1 = tempfile.mktemp('', "pack1", rw_dir)
+        pack_path2 = tempfile.mktemp('', "pack2", rw_dir)
+        index_path = tempfile.mktemp('', 'index', rw_dir)
+        iteration = 0
+
+        def rewind_streams():
+            for obj in pack_objs:
+                obj.stream.seek(0)
+        # END utility
+        for ppath, ipath, num_obj in zip((pack_path1, pack_path2),
+                                         (index_path, None),
+                                         (len(pack_objs), None)):
+            iwrite = None
+            if ipath:
+                ifile = open(ipath, 'wb')
+                iwrite = ifile.write
+            # END handle ip
+
+            # make sure we rewind the streams ... we work on the same objects over and over again
+            if iteration > 0:
+                rewind_streams()
+            # END rewind streams
+            iteration += 1
+
+            with open(ppath, 'wb') as pfile:
+                pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj)
+            assert os.path.getsize(ppath) > 100
+
+            # verify pack
+            pf = PackFile(ppath)
+            assert pf.size() == len(pack_objs)
+            assert pf.version() == PackFile.pack_version_default
+            assert pf.checksum() == pack_sha
+            pf.close()
+
+            # verify index
+            if ipath is not None:
+                ifile.close()
+                assert os.path.getsize(ipath) > 100
+                idx = PackIndexFile(ipath)
+                assert idx.version() == PackIndexFile.index_version_default
+                assert idx.packfile_checksum() == pack_sha
+                assert idx.indexfile_checksum() == index_sha
+                assert idx.size() == len(pack_objs)
+                idx.close()
+            # END verify files exist
+        # END for each packpath, indexpath pair
+
+        # verify the packs thoroughly
+        rewind_streams()
+        entity = PackEntity.create(pack_objs, rw_dir)
+        count = 0
+        for info in entity.info_iter():
+            count += 1
+            for use_crc in range(2):
+                assert entity.is_valid_stream(info.binsha, use_crc)
+            # END for each crc mode
+        # END for each info
+        assert count == len(pack_objs)
+        entity.close()
+        
+    def test_pack_64(self):
+        # TODO: hex-edit a pack helping us to verify that we can handle 64 byte offsets
+        # of course without really needing such a huge pack
+        raise SkipTest()
diff --git a/libs/gitdb/test/test_stream.py b/libs/gitdb/test/test_stream.py
new file mode 100644
index 000000000..96268252d
--- /dev/null
+++ b/libs/gitdb/test/test_stream.py
@@ -0,0 +1,164 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Test for object db"""
+
+from gitdb.test.lib import (
+    TestBase,
+    DummyStream,
+    make_bytes,
+    make_object,
+    fixture_path
+)
+
+from gitdb import (
+    DecompressMemMapReader,
+    FDCompressedSha1Writer,
+    LooseObjectDB,
+    Sha1Writer,
+    MemoryDB,
+    IStream,
+)
+from gitdb.util import hex_to_bin
+
+import zlib
+from gitdb.typ import (
+    str_blob_type
+)
+
+import tempfile
+import os
+from io import BytesIO
+
+
+class TestStream(TestBase):
+
+    """Test stream classes"""
+
+    data_sizes = (15, 10000, 1000 * 1024 + 512)
+
+    def _assert_stream_reader(self, stream, cdata, rewind_stream=lambda s: None):
+        """Make stream tests - the orig_stream is seekable, allowing it to be
+        rewound and reused
+        :param cdata: the data we expect to read from stream, the contents
+        :param rewind_stream: function called to rewind the stream to make it ready
+            for reuse"""
+        ns = 10
+        assert len(cdata) > ns - 1, "Data must be larger than %i, was %i" % (ns, len(cdata))
+
+        # read in small steps
+        ss = len(cdata) // ns
+        for i in range(ns):
+            data = stream.read(ss)
+            chunk = cdata[i * ss:(i + 1) * ss]
+            assert data == chunk
+        # END for each step
+        rest = stream.read()
+        if rest:
+            assert rest == cdata[-len(rest):]
+        # END handle rest
+
+        if isinstance(stream, DecompressMemMapReader):
+            assert len(stream.data()) == stream.compressed_bytes_read()
+        # END handle special type
+
+        rewind_stream(stream)
+
+        # read everything
+        rdata = stream.read()
+        assert rdata == cdata
+
+        if isinstance(stream, DecompressMemMapReader):
+            assert len(stream.data()) == stream.compressed_bytes_read()
+        # END handle special type
+
+    def test_decompress_reader(self):
+        for close_on_deletion in range(2):
+            for with_size in range(2):
+                for ds in self.data_sizes:
+                    cdata = make_bytes(ds, randomize=False)
+
+                    # zdata = zipped actual data
+                    # cdata = original content data
+
+                    # create reader
+                    if with_size:
+                        # need object data
+                        zdata = zlib.compress(make_object(str_blob_type, cdata))
+                        typ, size, reader = DecompressMemMapReader.new(zdata, close_on_deletion)
+                        assert size == len(cdata)
+                        assert typ == str_blob_type
+
+                        # even if we don't set the size, it will be set automatically on first read
+                        test_reader = DecompressMemMapReader(zdata, close_on_deletion=False)
+                        assert test_reader._s == len(cdata)
+                    else:
+                        # here we need content data
+                        zdata = zlib.compress(cdata)
+                        reader = DecompressMemMapReader(zdata, close_on_deletion, len(cdata))
+                        assert reader._s == len(cdata)
+                    # END get reader
+
+                    self._assert_stream_reader(reader, cdata, lambda r: r.seek(0))
+
+                    # put in a dummy stream for closing
+                    dummy = DummyStream()
+                    reader._m = dummy
+
+                    assert not dummy.closed
+                    del(reader)
+                    assert dummy.closed == close_on_deletion
+                # END for each datasize
+            # END whether size should be used
+        # END whether stream should be closed when deleted
+
+    def test_sha_writer(self):
+        writer = Sha1Writer()
+        assert 2 == writer.write("hi".encode("ascii"))
+        assert len(writer.sha(as_hex=1)) == 40
+        assert len(writer.sha(as_hex=0)) == 20
+
+        # make sure it does something ;)
+        prev_sha = writer.sha()
+        writer.write("hi again".encode("ascii"))
+        assert writer.sha() != prev_sha
+
+    def test_compressed_writer(self):
+        for ds in self.data_sizes:
+            fd, path = tempfile.mkstemp()
+            ostream = FDCompressedSha1Writer(fd)
+            data = make_bytes(ds, randomize=False)
+
+            # for now, just a single write, code doesn't care about chunking
+            assert len(data) == ostream.write(data)
+            ostream.close()
+
+            # its closed already
+            self.failUnlessRaises(OSError, os.close, fd)
+
+            # read everything back, compare to data we zip
+            fd = os.open(path, os.O_RDONLY | getattr(os, 'O_BINARY', 0))
+            written_data = os.read(fd, os.path.getsize(path))
+            assert len(written_data) == os.path.getsize(path)
+            os.close(fd)
+            assert written_data == zlib.compress(data, 1)   # best speed
+
+            os.remove(path)
+        # END for each os
+
+    def test_decompress_reader_special_case(self):
+        odb = LooseObjectDB(fixture_path('objects'))
+        mdb = MemoryDB()
+        for sha in (b'888401851f15db0eed60eb1bc29dec5ddcace911',
+                    b'7bb839852ed5e3a069966281bb08d50012fb309b',):
+            ostream = odb.stream(hex_to_bin(sha))
+
+            # if there is a bug, we will be missing one byte exactly !
+            data = ostream.read()
+            assert len(data) == ostream.size
+
+            # Putting it back in should yield nothing new - after all, we have
+            dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
+            assert dump.hexsha == sha
+        # end for each loose object sha to test
diff --git a/libs/gitdb/test/test_util.py b/libs/gitdb/test/test_util.py
new file mode 100644
index 000000000..847bdab5e
--- /dev/null
+++ b/libs/gitdb/test/test_util.py
@@ -0,0 +1,100 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Test for object db"""
+import tempfile
+import os
+
+from gitdb.test.lib import TestBase
+from gitdb.util import (
+    to_hex_sha,
+    to_bin_sha,
+    NULL_HEX_SHA,
+    LockedFD
+)
+
+
+class TestUtils(TestBase):
+
+    def test_basics(self):
+        assert to_hex_sha(NULL_HEX_SHA) == NULL_HEX_SHA
+        assert len(to_bin_sha(NULL_HEX_SHA)) == 20
+        assert to_hex_sha(to_bin_sha(NULL_HEX_SHA)) == NULL_HEX_SHA.encode("ascii")
+
+    def _cmp_contents(self, file_path, data):
+        # raise if data from file at file_path
+        # does not match data string
+        with open(file_path, "rb") as fp:
+            assert fp.read() == data.encode("ascii")
+
+    def test_lockedfd(self):
+        my_file = tempfile.mktemp()
+        orig_data = "hello"
+        new_data = "world"
+        with open(my_file, "wb") as my_file_fp:
+            my_file_fp.write(orig_data.encode("ascii"))
+
+        try:
+            lfd = LockedFD(my_file)
+            lockfilepath = lfd._lockfilepath()
+
+            # cannot end before it was started
+            self.failUnlessRaises(AssertionError, lfd.rollback)
+            self.failUnlessRaises(AssertionError, lfd.commit)
+
+            # open for writing
+            assert not os.path.isfile(lockfilepath)
+            wfd = lfd.open(write=True)
+            assert lfd._fd is wfd
+            assert os.path.isfile(lockfilepath)
+
+            # write data and fail
+            os.write(wfd, new_data.encode("ascii"))
+            lfd.rollback()
+            assert lfd._fd is None
+            self._cmp_contents(my_file, orig_data)
+            assert not os.path.isfile(lockfilepath)
+
+            # additional call doesn't fail
+            lfd.commit()
+            lfd.rollback()
+
+            # test reading
+            lfd = LockedFD(my_file)
+            rfd = lfd.open(write=False)
+            assert os.read(rfd, len(orig_data)) == orig_data.encode("ascii")
+
+            assert os.path.isfile(lockfilepath)
+            # deletion rolls back
+            del(lfd)
+            assert not os.path.isfile(lockfilepath)
+
+            # write data - concurrently
+            lfd = LockedFD(my_file)
+            olfd = LockedFD(my_file)
+            assert not os.path.isfile(lockfilepath)
+            wfdstream = lfd.open(write=True, stream=True)       # this time as stream
+            assert os.path.isfile(lockfilepath)
+            # another one fails
+            self.failUnlessRaises(IOError, olfd.open)
+
+            wfdstream.write(new_data.encode("ascii"))
+            lfd.commit()
+            assert not os.path.isfile(lockfilepath)
+            self._cmp_contents(my_file, new_data)
+
+            # could test automatic _end_writing on destruction
+        finally:
+            os.remove(my_file)
+        # END final cleanup
+
+        # try non-existing file for reading
+        lfd = LockedFD(tempfile.mktemp())
+        try:
+            lfd.open(write=False)
+        except OSError:
+            assert not os.path.exists(lfd._lockfilepath())
+        else:
+            self.fail("expected OSError")
+        # END handle exceptions
diff --git a/libs/gitdb/typ.py b/libs/gitdb/typ.py
new file mode 100644
index 000000000..98d15f3ec
--- /dev/null
+++ b/libs/gitdb/typ.py
@@ -0,0 +1,10 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module containing information about types known to the database"""
+
+str_blob_type = b'blob'
+str_commit_type = b'commit'
+str_tree_type = b'tree'
+str_tag_type = b'tag'
diff --git a/libs/gitdb/util.py b/libs/gitdb/util.py
new file mode 100644
index 000000000..8a1819b6d
--- /dev/null
+++ b/libs/gitdb/util.py
@@ -0,0 +1,401 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel ([email protected]) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+import binascii
+import os
+import mmap
+import sys
+import time
+import errno
+
+from io import BytesIO
+
+from smmap import (
+    StaticWindowMapManager,
+    SlidingWindowMapManager,
+    SlidingWindowMapBuffer
+)
+
+# initialize our global memory manager instance
+# Use it to free cached (and unused) resources.
+if sys.version_info < (2, 6):
+    mman = StaticWindowMapManager()
+else:
+    mman = SlidingWindowMapManager()
+# END handle mman
+
+import hashlib
+
+try:
+    from struct import unpack_from
+except ImportError:
+    from struct import unpack, calcsize
+    __calcsize_cache = dict()
+
+    def unpack_from(fmt, data, offset=0):
+        try:
+            size = __calcsize_cache[fmt]
+        except KeyError:
+            size = calcsize(fmt)
+            __calcsize_cache[fmt] = size
+        # END exception handling
+        return unpack(fmt, data[offset: offset + size])
+    # END own unpack_from implementation
+
+
+#{ Aliases
+
+hex_to_bin = binascii.a2b_hex
+bin_to_hex = binascii.b2a_hex
+
+# errors
+ENOENT = errno.ENOENT
+
+# os shortcuts
+exists = os.path.exists
+mkdir = os.mkdir
+chmod = os.chmod
+isdir = os.path.isdir
+isfile = os.path.isfile
+rename = os.rename
+dirname = os.path.dirname
+basename = os.path.basename
+join = os.path.join
+read = os.read
+write = os.write
+close = os.close
+fsync = os.fsync
+
+
+def _retry(func, *args, **kwargs):
+    # Wrapper around functions, that are problematic on "Windows". Sometimes
+    # the OS or someone else has still a handle to the file
+    if sys.platform == "win32":
+        for _ in range(10):
+            try:
+                return func(*args, **kwargs)
+            except Exception:
+                time.sleep(0.1)
+        return func(*args, **kwargs)
+    else:
+        return func(*args, **kwargs)
+
+
+def remove(*args, **kwargs):
+    return _retry(os.remove, *args, **kwargs)
+
+
+# Backwards compatibility imports
+from gitdb.const import (
+    NULL_BIN_SHA,
+    NULL_HEX_SHA
+)
+
+#} END Aliases
+
+#{ compatibility stuff ...
+
+
+class _RandomAccessBytesIO(object):
+
+    """Wrapper to provide required functionality in case memory maps cannot or may
+    not be used. This is only really required in python 2.4"""
+    __slots__ = '_sio'
+
+    def __init__(self, buf=''):
+        self._sio = BytesIO(buf)
+
+    def __getattr__(self, attr):
+        return getattr(self._sio, attr)
+
+    def __len__(self):
+        return len(self.getvalue())
+
+    def __getitem__(self, i):
+        return self.getvalue()[i]
+
+    def __getslice__(self, start, end):
+        return self.getvalue()[start:end]
+
+
+def byte_ord(b):
+    """
+    Return the integer representation of the byte string.  This supports Python
+    3 byte arrays as well as standard strings.
+    """
+    try:
+        return ord(b)
+    except TypeError:
+        return b
+
+#} END compatibility stuff ...
+
+#{ Routines
+
+
+def make_sha(source=''.encode("ascii")):
+    """A python2.4 workaround for the sha/hashlib module fiasco
+
+    **Note** From the dulwich project """
+    try:
+        return hashlib.sha1(source)
+    except NameError:
+        import sha
+        sha1 = sha.sha(source)
+        return sha1
+
+
+def allocate_memory(size):
+    """:return: a file-protocol accessible memory block of the given size"""
+    if size == 0:
+        return _RandomAccessBytesIO(b'')
+    # END handle empty chunks gracefully
+
+    try:
+        return mmap.mmap(-1, size)  # read-write by default
+    except EnvironmentError:
+        # setup real memory instead
+        # this of course may fail if the amount of memory is not available in
+        # one chunk - would only be the case in python 2.4, being more likely on
+        # 32 bit systems.
+        return _RandomAccessBytesIO(b"\0" * size)
+    # END handle memory allocation
+
+
+def file_contents_ro(fd, stream=False, allow_mmap=True):
+    """:return: read-only contents of the file represented by the file descriptor fd
+
+    :param fd: file descriptor opened for reading
+    :param stream: if False, random access is provided, otherwise the stream interface
+        is provided.
+    :param allow_mmap: if True, its allowed to map the contents into memory, which
+        allows large files to be handled and accessed efficiently. The file-descriptor
+        will change its position if this is False"""
+    try:
+        if allow_mmap:
+            # supports stream and random access
+            try:
+                return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+            except EnvironmentError:
+                # python 2.4 issue, 0 wants to be the actual size
+                return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ)
+            # END handle python 2.4
+    except OSError:
+        pass
+    # END exception handling
+
+    # read manully
+    contents = os.read(fd, os.fstat(fd).st_size)
+    if stream:
+        return _RandomAccessBytesIO(contents)
+    return contents
+
+
+def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0):
+    """Get the file contents at filepath as fast as possible
+
+    :return: random access compatible memory of the given filepath
+    :param stream: see ``file_contents_ro``
+    :param allow_mmap: see ``file_contents_ro``
+    :param flags: additional flags to pass to os.open
+    :raise OSError: If the file could not be opened
+
+    **Note** for now we don't try to use O_NOATIME directly as the right value needs to be
+    shared per database in fact. It only makes a real difference for loose object
+    databases anyway, and they use it with the help of the ``flags`` parameter"""
+    fd = os.open(filepath, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags)
+    try:
+        return file_contents_ro(fd, stream, allow_mmap)
+    finally:
+        close(fd)
+    # END assure file is closed
+
+
+def sliding_ro_buffer(filepath, flags=0):
+    """
+    :return: a buffer compatible object which uses our mapped memory manager internally
+        ready to read the whole given filepath"""
+    return SlidingWindowMapBuffer(mman.make_cursor(filepath), flags=flags)
+
+
+def to_hex_sha(sha):
+    """:return: hexified version  of sha"""
+    if len(sha) == 40:
+        return sha
+    return bin_to_hex(sha)
+
+
+def to_bin_sha(sha):
+    if len(sha) == 20:
+        return sha
+    return hex_to_bin(sha)
+
+
+#} END routines
+
+
+#{ Utilities
+
+class LazyMixin(object):
+
+    """
+    Base class providing an interface to lazily retrieve attribute values upon
+    first access. If slots are used, memory will only be reserved once the attribute
+    is actually accessed and retrieved the first time. All future accesses will
+    return the cached value as stored in the Instance's dict or slot.
+    """
+
+    __slots__ = tuple()
+
+    def __getattr__(self, attr):
+        """
+        Whenever an attribute is requested that we do not know, we allow it
+        to be created and set. Next time the same attribute is reqeusted, it is simply
+        returned from our dict/slots. """
+        self._set_cache_(attr)
+        # will raise in case the cache was not created
+        return object.__getattribute__(self, attr)
+
+    def _set_cache_(self, attr):
+        """
+        This method should be overridden in the derived class.
+        It should check whether the attribute named by attr can be created
+        and cached. Do nothing if you do not know the attribute or call your subclass
+
+        The derived class may create as many additional attributes as it deems
+        necessary in case a git command returns more information than represented
+        in the single attribute."""
+        pass
+
+
+class LockedFD(object):
+
+    """
+    This class facilitates a safe read and write operation to a file on disk.
+    If we write to 'file', we obtain a lock file at 'file.lock' and write to
+    that instead. If we succeed, the lock file will be renamed to overwrite
+    the original file.
+
+    When reading, we obtain a lock file, but to prevent other writers from
+    succeeding while we are reading the file.
+
+    This type handles error correctly in that it will assure a consistent state
+    on destruction.
+
+    **note** with this setup, parallel reading is not possible"""
+    __slots__ = ("_filepath", '_fd', '_write')
+
+    def __init__(self, filepath):
+        """Initialize an instance with the givne filepath"""
+        self._filepath = filepath
+        self._fd = None
+        self._write = None          # if True, we write a file
+
+    def __del__(self):
+        # will do nothing if the file descriptor is already closed
+        if self._fd is not None:
+            self.rollback()
+
+    def _lockfilepath(self):
+        return "%s.lock" % self._filepath
+
+    def open(self, write=False, stream=False):
+        """
+        Open the file descriptor for reading or writing, both in binary mode.
+
+        :param write: if True, the file descriptor will be opened for writing. Other
+            wise it will be opened read-only.
+        :param stream: if True, the file descriptor will be wrapped into a simple stream
+            object which supports only reading or writing
+        :return: fd to read from or write to. It is still maintained by this instance
+            and must not be closed directly
+        :raise IOError: if the lock could not be retrieved
+        :raise OSError: If the actual file could not be opened for reading
+
+        **note** must only be called once"""
+        if self._write is not None:
+            raise AssertionError("Called %s multiple times" % self.open)
+
+        self._write = write
+
+        # try to open the lock file
+        binary = getattr(os, 'O_BINARY', 0)
+        lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary
+        try:
+            fd = os.open(self._lockfilepath(), lockmode, int("600", 8))
+            if not write:
+                os.close(fd)
+            else:
+                self._fd = fd
+            # END handle file descriptor
+        except OSError:
+            raise IOError("Lock at %r could not be obtained" % self._lockfilepath())
+        # END handle lock retrieval
+
+        # open actual file if required
+        if self._fd is None:
+            # we could specify exlusive here, as we obtained the lock anyway
+            try:
+                self._fd = os.open(self._filepath, os.O_RDONLY | binary)
+            except:
+                # assure we release our lockfile
+                remove(self._lockfilepath())
+                raise
+            # END handle lockfile
+        # END open descriptor for reading
+
+        if stream:
+            # need delayed import
+            from gitdb.stream import FDStream
+            return FDStream(self._fd)
+        else:
+            return self._fd
+        # END handle stream
+
+    def commit(self):
+        """When done writing, call this function to commit your changes into the
+        actual file.
+        The file descriptor will be closed, and the lockfile handled.
+
+        **Note** can be called multiple times"""
+        self._end_writing(successful=True)
+
+    def rollback(self):
+        """Abort your operation without any changes. The file descriptor will be
+        closed, and the lock released.
+
+        **Note** can be called multiple times"""
+        self._end_writing(successful=False)
+
+    def _end_writing(self, successful=True):
+        """Handle the lock according to the write mode """
+        if self._write is None:
+            raise AssertionError("Cannot end operation if it wasn't started yet")
+
+        if self._fd is None:
+            return
+
+        os.close(self._fd)
+        self._fd = None
+
+        lockfile = self._lockfilepath()
+        if self._write and successful:
+            # on windows, rename does not silently overwrite the existing one
+            if sys.platform == "win32":
+                if isfile(self._filepath):
+                    remove(self._filepath)
+                # END remove if exists
+            # END win32 special handling
+            os.rename(lockfile, self._filepath)
+
+            # assure others can at least read the file - the tmpfile left it at rw--
+            # We may also write that file, on windows that boils down to a remove-
+            # protection as well
+            chmod(self._filepath, int("644", 8))
+        else:
+            # just delete the file so far, we failed
+            remove(lockfile)
+        # END successful handling
+
+#} END utilities
diff --git a/libs/gitdb/utils/__init__.py b/libs/gitdb/utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/libs/gitdb/utils/__init__.py
diff --git a/libs/gitdb/utils/compat.py b/libs/gitdb/utils/compat.py
new file mode 100644
index 000000000..a7899cb14
--- /dev/null
+++ b/libs/gitdb/utils/compat.py
@@ -0,0 +1,43 @@
+import sys
+
+PY3 = sys.version_info[0] == 3
+
+try:
+    from itertools import izip
+    xrange = xrange
+except ImportError:
+    # py3
+    izip = zip
+    xrange = range
+# end handle python version
+
+try:
+    # Python 2
+    buffer = buffer
+    memoryview = buffer
+    # Assume no memory view ...
+    def to_bytes(i):
+        return i
+except NameError:
+    # Python 3 has no `buffer`; only `memoryview`
+    # However, it's faster to just slice the object directly, maybe it keeps a view internally
+    def buffer(obj, offset, size=None):
+        if size is None:
+            # return memoryview(obj)[offset:]
+            return obj[offset:]
+        else:
+            # return memoryview(obj)[offset:offset+size]
+            return obj[offset:offset + size]
+    # end buffer reimplementation
+    # smmap can return memory view objects, which can't be compared as buffers/bytes can ... 
+    def to_bytes(i):
+        if isinstance(i, memoryview):
+            return i.tobytes()
+        return i
+
+    memoryview = memoryview
+
+try:
+    MAXSIZE = sys.maxint
+except AttributeError:
+    MAXSIZE = sys.maxsize
diff --git a/libs/gitdb/utils/encoding.py b/libs/gitdb/utils/encoding.py
new file mode 100644
index 000000000..4d270af9d
--- /dev/null
+++ b/libs/gitdb/utils/encoding.py
@@ -0,0 +1,31 @@
+from gitdb.utils import compat
+
+if compat.PY3:
+    string_types = (str, )
+    text_type = str
+else:
+    string_types = (basestring, )
+    text_type = unicode
+
+
+def force_bytes(data, encoding="ascii"):
+    if isinstance(data, bytes):
+        return data
+
+    if isinstance(data, string_types):
+        return data.encode(encoding)
+
+    return data
+
+
+def force_text(data, encoding="utf-8"):
+    if isinstance(data, text_type):
+        return data
+
+    if isinstance(data, bytes):
+        return data.decode(encoding)
+
+    if compat.PY3:
+        return text_type(data, encoding)
+    else:
+        return text_type(data)
author	morpheus65535 <[email protected]>	2018-09-16 20:27:00 -0400
committer	morpheus65535 <[email protected]>	2018-09-16 20:33:04 -0400
commit	0f061f21226f91883c841f85ceef31b30981277a (patch)
tree	a1350723ae688ccbae4d4ca564cc4175ccc73996 /libs/gitdb
parent	8b681d8a151a3b41d3aaa5bfdd7a082bdda7896c (diff)
download	bazarr-0f061f21226f91883c841f85ceef31b30981277a.tar.gz bazarr-0f061f21226f91883c841f85ceef31b30981277a.zip