summaryrefslogtreecommitdiffhomepage
path: root/libs/json_tricks
diff options
context:
space:
mode:
authorpanni <[email protected]>2018-10-31 17:08:29 +0100
committerpanni <[email protected]>2018-10-31 17:08:29 +0100
commit8f584143f8afc46a75a83dab5243739772e3562b (patch)
treec7dae21e993880af8bee71ad7b5a63f2977db577 /libs/json_tricks
parent4beaeaa99e84bbe1ed87d0466a55a22ba25c8437 (diff)
downloadbazarr-8f584143f8afc46a75a83dab5243739772e3562b.tar.gz
bazarr-8f584143f8afc46a75a83dab5243739772e3562b.zip
update deps
Diffstat (limited to 'libs/json_tricks')
-rw-r--r--libs/json_tricks/__init__.py24
-rw-r--r--libs/json_tricks/comment.py29
-rw-r--r--libs/json_tricks/decoders.py248
-rw-r--r--libs/json_tricks/encoders.py311
-rw-r--r--libs/json_tricks/nonp.py207
-rw-r--r--libs/json_tricks/np.py28
-rw-r--r--libs/json_tricks/np_utils.py15
-rw-r--r--libs/json_tricks/utils.py81
8 files changed, 943 insertions, 0 deletions
diff --git a/libs/json_tricks/__init__.py b/libs/json_tricks/__init__.py
new file mode 100644
index 000000000..8c890c6de
--- /dev/null
+++ b/libs/json_tricks/__init__.py
@@ -0,0 +1,24 @@
+
+from .utils import hashodict, NoNumpyException, NoPandasException, get_scalar_repr, encode_scalars_inplace
+from .comment import strip_comment_line_with_symbol, strip_comments
+from .encoders import TricksEncoder, json_date_time_encode, class_instance_encode, json_complex_encode, \
+ numeric_types_encode, ClassInstanceEncoder, json_set_encode, pandas_encode, nopandas_encode, \
+ numpy_encode, NumpyEncoder, nonumpy_encode, NoNumpyEncoder
+from .decoders import DuplicateJsonKeyException, TricksPairHook, json_date_time_hook, json_complex_hook, \
+ numeric_types_hook, ClassInstanceHook, json_set_hook, pandas_hook, nopandas_hook, json_numpy_obj_hook, \
+ json_nonumpy_obj_hook
+from .nonp import dumps, dump, loads, load
+
+
+try:
+ # find_module takes just as long as importing, so no optimization possible
+ import numpy
+except ImportError:
+ NUMPY_MODE = False
+ # from .nonp import dumps, dump, loads, load, nonumpy_encode as numpy_encode, json_nonumpy_obj_hook as json_numpy_obj_hook
+else:
+ NUMPY_MODE = True
+ # from .np import dumps, dump, loads, load, numpy_encode, NumpyEncoder, json_numpy_obj_hook
+ # from .np_utils import encode_scalars_inplace
+
+
diff --git a/libs/json_tricks/comment.py b/libs/json_tricks/comment.py
new file mode 100644
index 000000000..8b3c06909
--- /dev/null
+++ b/libs/json_tricks/comment.py
@@ -0,0 +1,29 @@
+
+from re import findall
+
+
+def strip_comment_line_with_symbol(line, start):
+ parts = line.split(start)
+ counts = [len(findall(r'(?:^|[^"\\]|(?:\\\\|\\")+)(")', part)) for part in parts]
+ total = 0
+ for nr, count in enumerate(counts):
+ total += count
+ if total % 2 == 0:
+ return start.join(parts[:nr+1]).rstrip()
+ else:
+ return line.rstrip()
+
+
+def strip_comments(string, comment_symbols=frozenset(('#', '//'))):
+ """
+ :param string: A string containing json with comments started by comment_symbols.
+ :param comment_symbols: Iterable of symbols that start a line comment (default # or //).
+ :return: The string with the comments removed.
+ """
+ lines = string.splitlines()
+ for k in range(len(lines)):
+ for symbol in comment_symbols:
+ lines[k] = strip_comment_line_with_symbol(lines[k], start=symbol)
+ return '\n'.join(lines)
+
+
diff --git a/libs/json_tricks/decoders.py b/libs/json_tricks/decoders.py
new file mode 100644
index 000000000..221a8f93e
--- /dev/null
+++ b/libs/json_tricks/decoders.py
@@ -0,0 +1,248 @@
+
+from datetime import datetime, date, time, timedelta
+from fractions import Fraction
+from importlib import import_module
+from collections import OrderedDict
+from decimal import Decimal
+from logging import warning
+from json_tricks import NoPandasException, NoNumpyException
+
+
+class DuplicateJsonKeyException(Exception):
+ """ Trying to load a json map which contains duplicate keys, but allow_duplicates is False """
+
+
+class TricksPairHook(object):
+ """
+ Hook that converts json maps to the appropriate python type (dict or OrderedDict)
+ and then runs any number of hooks on the individual maps.
+ """
+ def __init__(self, ordered=True, obj_pairs_hooks=None, allow_duplicates=True):
+ """
+ :param ordered: True if maps should retain their ordering.
+ :param obj_pairs_hooks: An iterable of hooks to apply to elements.
+ """
+ self.map_type = OrderedDict
+ if not ordered:
+ self.map_type = dict
+ self.obj_pairs_hooks = []
+ if obj_pairs_hooks:
+ self.obj_pairs_hooks = list(obj_pairs_hooks)
+ self.allow_duplicates = allow_duplicates
+
+ def __call__(self, pairs):
+ if not self.allow_duplicates:
+ known = set()
+ for key, value in pairs:
+ if key in known:
+ raise DuplicateJsonKeyException(('Trying to load a json map which contains a' +
+ ' duplicate key "{0:}" (but allow_duplicates is False)').format(key))
+ known.add(key)
+ map = self.map_type(pairs)
+ for hook in self.obj_pairs_hooks:
+ map = hook(map)
+ return map
+
+
+def json_date_time_hook(dct):
+ """
+ Return an encoded date, time, datetime or timedelta to it's python representation, including optional timezone.
+
+ :param dct: (dict) json encoded date, time, datetime or timedelta
+ :return: (date/time/datetime/timedelta obj) python representation of the above
+ """
+ def get_tz(dct):
+ if not 'tzinfo' in dct:
+ return None
+ try:
+ import pytz
+ except ImportError as err:
+ raise ImportError(('Tried to load a json object which has a timezone-aware (date)time. '
+ 'However, `pytz` could not be imported, so the object could not be loaded. '
+ 'Error: {0:}').format(str(err)))
+ return pytz.timezone(dct['tzinfo'])
+
+ if isinstance(dct, dict):
+ if '__date__' in dct:
+ return date(year=dct.get('year', 0), month=dct.get('month', 0), day=dct.get('day', 0))
+ elif '__time__' in dct:
+ tzinfo = get_tz(dct)
+ return time(hour=dct.get('hour', 0), minute=dct.get('minute', 0), second=dct.get('second', 0),
+ microsecond=dct.get('microsecond', 0), tzinfo=tzinfo)
+ elif '__datetime__' in dct:
+ tzinfo = get_tz(dct)
+ return datetime(year=dct.get('year', 0), month=dct.get('month', 0), day=dct.get('day', 0),
+ hour=dct.get('hour', 0), minute=dct.get('minute', 0), second=dct.get('second', 0),
+ microsecond=dct.get('microsecond', 0), tzinfo=tzinfo)
+ elif '__timedelta__' in dct:
+ return timedelta(days=dct.get('days', 0), seconds=dct.get('seconds', 0),
+ microseconds=dct.get('microseconds', 0))
+ return dct
+
+
+def json_complex_hook(dct):
+ """
+ Return an encoded complex number to it's python representation.
+
+ :param dct: (dict) json encoded complex number (__complex__)
+ :return: python complex number
+ """
+ if isinstance(dct, dict):
+ if '__complex__' in dct:
+ parts = dct['__complex__']
+ assert len(parts) == 2
+ return parts[0] + parts[1] * 1j
+ return dct
+
+
+def numeric_types_hook(dct):
+ if isinstance(dct, dict):
+ if '__decimal__' in dct:
+ return Decimal(dct['__decimal__'])
+ if '__fraction__' in dct:
+ return Fraction(numerator=dct['numerator'], denominator=dct['denominator'])
+ return dct
+
+
+class ClassInstanceHook(object):
+ """
+ This hook tries to convert json encoded by class_instance_encoder back to it's original instance.
+ It only works if the environment is the same, e.g. the class is similarly importable and hasn't changed.
+ """
+ def __init__(self, cls_lookup_map=None):
+ self.cls_lookup_map = cls_lookup_map or {}
+
+ def __call__(self, dct):
+ if isinstance(dct, dict) and '__instance_type__' in dct:
+ mod, name = dct['__instance_type__']
+ attrs = dct['attributes']
+ if mod is None:
+ try:
+ Cls = getattr((__import__('__main__')), name)
+ except (ImportError, AttributeError) as err:
+ if not name in self.cls_lookup_map:
+ raise ImportError(('class {0:s} seems to have been exported from the main file, which means '
+ 'it has no module/import path set; you need to provide cls_lookup_map which maps names '
+ 'to classes').format(name))
+ Cls = self.cls_lookup_map[name]
+ else:
+ imp_err = None
+ try:
+ module = import_module('{0:}'.format(mod, name))
+ except ImportError as err:
+ imp_err = ('encountered import error "{0:}" while importing "{1:}" to decode a json file; perhaps '
+ 'it was encoded in a different environment where {1:}.{2:} was available').format(err, mod, name)
+ else:
+ if not hasattr(module, name):
+ imp_err = 'imported "{0:}" but could find "{1:}" inside while decoding a json file (found {2:}'.format(
+ module, name, ', '.join(attr for attr in dir(module) if not attr.startswith('_')))
+ Cls = getattr(module, name)
+ if imp_err:
+ if 'name' in self.cls_lookup_map:
+ Cls = self.cls_lookup_map[name]
+ else:
+ raise ImportError(imp_err)
+ try:
+ obj = Cls.__new__(Cls)
+ except TypeError:
+ raise TypeError(('problem while decoding instance of "{0:s}"; this instance has a special '
+ '__new__ method and can\'t be restored').format(name))
+ if hasattr(obj, '__json_decode__'):
+ obj.__json_decode__(**attrs)
+ else:
+ obj.__dict__ = dict(attrs)
+ return obj
+ return dct
+
+
+def json_set_hook(dct):
+ """
+ Return an encoded set to it's python representation.
+ """
+ if isinstance(dct, dict):
+ if '__set__' in dct:
+ return set((tuple(item) if isinstance(item, list) else item) for item in dct['__set__'])
+ return dct
+
+
+def pandas_hook(dct):
+ if '__pandas_dataframe__' in dct or '__pandas_series__' in dct:
+ # todo: this is experimental
+ if not getattr(pandas_hook, '_warned', False):
+ pandas_hook._warned = True
+ warning('Pandas loading support in json-tricks is experimental and may change in future versions.')
+ if '__pandas_dataframe__' in dct:
+ try:
+ from pandas import DataFrame
+ except ImportError:
+ raise NoPandasException('Trying to decode a map which appears to represent a pandas data structure, but pandas appears not to be installed.')
+ from numpy import dtype, array
+ meta = dct.pop('__pandas_dataframe__')
+ indx = dct.pop('index') if 'index' in dct else None
+ dtypes = dict((colname, dtype(tp)) for colname, tp in zip(meta['column_order'], meta['types']))
+ data = OrderedDict()
+ for name, col in dct.items():
+ data[name] = array(col, dtype=dtypes[name])
+ return DataFrame(
+ data=data,
+ index=indx,
+ columns=meta['column_order'],
+ # mixed `dtypes` argument not supported, so use duct of numpy arrays
+ )
+ elif '__pandas_series__' in dct:
+ from pandas import Series
+ from numpy import dtype, array
+ meta = dct.pop('__pandas_series__')
+ indx = dct.pop('index') if 'index' in dct else None
+ return Series(
+ data=dct['data'],
+ index=indx,
+ name=meta['name'],
+ dtype=dtype(meta['type']),
+ )
+ return dct
+
+
+def nopandas_hook(dct):
+ if isinstance(dct, dict) and ('__pandas_dataframe__' in dct or '__pandas_series__' in dct):
+ raise NoPandasException(('Trying to decode a map which appears to represent a pandas '
+ 'data structure, but pandas support is not enabled, perhaps it is not installed.'))
+ return dct
+
+
+def json_numpy_obj_hook(dct):
+ """
+ Replace any numpy arrays previously encoded by NumpyEncoder to their proper
+ shape, data type and data.
+
+ :param dct: (dict) json encoded ndarray
+ :return: (ndarray) if input was an encoded ndarray
+ """
+ if isinstance(dct, dict) and '__ndarray__' in dct:
+ try:
+ from numpy import asarray
+ import numpy as nptypes
+ except ImportError:
+ raise NoNumpyException('Trying to decode a map which appears to represent a numpy '
+ 'array, but numpy appears not to be installed.')
+ order = 'A'
+ if 'Corder' in dct:
+ order = 'C' if dct['Corder'] else 'F'
+ if dct['shape']:
+ return asarray(dct['__ndarray__'], dtype=dct['dtype'], order=order)
+ else:
+ dtype = getattr(nptypes, dct['dtype'])
+ return dtype(dct['__ndarray__'])
+ return dct
+
+
+def json_nonumpy_obj_hook(dct):
+ """
+ This hook has no effect except to check if you're trying to decode numpy arrays without support, and give you a useful message.
+ """
+ if isinstance(dct, dict) and '__ndarray__' in dct:
+ raise NoNumpyException(('Trying to decode a map which appears to represent a numpy array, '
+ 'but numpy support is not enabled, perhaps it is not installed.'))
+ return dct
+
+
diff --git a/libs/json_tricks/encoders.py b/libs/json_tricks/encoders.py
new file mode 100644
index 000000000..386e690e5
--- /dev/null
+++ b/libs/json_tricks/encoders.py
@@ -0,0 +1,311 @@
+
+from datetime import datetime, date, time, timedelta
+from fractions import Fraction
+from logging import warning
+from json import JSONEncoder
+from sys import version
+from decimal import Decimal
+from .utils import hashodict, call_with_optional_kwargs, NoPandasException, NoNumpyException
+
+
+class TricksEncoder(JSONEncoder):
+ """
+ Encoder that runs any number of encoder functions or instances on
+ the objects that are being encoded.
+
+ Each encoder should make any appropriate changes and return an object,
+ changed or not. This will be passes to the other encoders.
+ """
+ def __init__(self, obj_encoders=None, silence_typeerror=False, primitives=False, **json_kwargs):
+ """
+ :param obj_encoders: An iterable of functions or encoder instances to try.
+ :param silence_typeerror: If set to True, ignore the TypeErrors that Encoder instances throw (default False).
+ """
+ self.obj_encoders = []
+ if obj_encoders:
+ self.obj_encoders = list(obj_encoders)
+ self.silence_typeerror = silence_typeerror
+ self.primitives = primitives
+ super(TricksEncoder, self).__init__(**json_kwargs)
+
+ def default(self, obj, *args, **kwargs):
+ """
+ This is the method of JSONEncoders that is called for each object; it calls
+ all the encoders with the previous one's output used as input.
+
+ It works for Encoder instances, but they are expected not to throw
+ `TypeError` for unrecognized types (the super method does that by default).
+
+ It never calls the `super` method so if there are non-primitive types
+ left at the end, you'll get an encoding error.
+ """
+ prev_id = id(obj)
+ for encoder in self.obj_encoders:
+ if hasattr(encoder, 'default'):
+ #todo: write test for this scenario (maybe ClassInstanceEncoder?)
+ try:
+ obj = call_with_optional_kwargs(encoder.default, obj, primitives=self.primitives)
+ except TypeError as err:
+ if not self.silence_typeerror:
+ raise
+ elif hasattr(encoder, '__call__'):
+ obj = call_with_optional_kwargs(encoder, obj, primitives=self.primitives)
+ else:
+ raise TypeError('`obj_encoder` {0:} does not have `default` method and is not callable'.format(encoder))
+ if id(obj) == prev_id:
+ #todo: test
+ raise TypeError('Object of type {0:} could not be encoded by {1:} using encoders [{2:s}]'.format(
+ type(obj), self.__class__.__name__, ', '.join(str(encoder) for encoder in self.obj_encoders)))
+ return obj
+
+
+def json_date_time_encode(obj, primitives=False):
+ """
+ Encode a date, time, datetime or timedelta to a string of a json dictionary, including optional timezone.
+
+ :param obj: date/time/datetime/timedelta obj
+ :return: (dict) json primitives representation of date, time, datetime or timedelta
+ """
+ if primitives and isinstance(obj, (date, time, datetime)):
+ return obj.isoformat()
+ if isinstance(obj, datetime):
+ dct = hashodict([('__datetime__', None), ('year', obj.year), ('month', obj.month),
+ ('day', obj.day), ('hour', obj.hour), ('minute', obj.minute),
+ ('second', obj.second), ('microsecond', obj.microsecond)])
+ if obj.tzinfo:
+ dct['tzinfo'] = obj.tzinfo.zone
+ elif isinstance(obj, date):
+ dct = hashodict([('__date__', None), ('year', obj.year), ('month', obj.month), ('day', obj.day)])
+ elif isinstance(obj, time):
+ dct = hashodict([('__time__', None), ('hour', obj.hour), ('minute', obj.minute),
+ ('second', obj.second), ('microsecond', obj.microsecond)])
+ if obj.tzinfo:
+ dct['tzinfo'] = obj.tzinfo.zone
+ elif isinstance(obj, timedelta):
+ if primitives:
+ return obj.total_seconds()
+ else:
+ dct = hashodict([('__timedelta__', None), ('days', obj.days), ('seconds', obj.seconds),
+ ('microseconds', obj.microseconds)])
+ else:
+ return obj
+ for key, val in tuple(dct.items()):
+ if not key.startswith('__') and not val:
+ del dct[key]
+ return dct
+
+
+def class_instance_encode(obj, primitives=False):
+ """
+ Encodes a class instance to json. Note that it can only be recovered if the environment allows the class to be
+ imported in the same way.
+ """
+ if isinstance(obj, list) or isinstance(obj, dict):
+ return obj
+ if hasattr(obj, '__class__') and hasattr(obj, '__dict__'):
+ if not hasattr(obj, '__new__'):
+ raise TypeError('class "{0:s}" does not have a __new__ method; '.format(obj.__class__) +
+ ('perhaps it is an old-style class not derived from `object`; add `object` as a base class to encode it.'
+ if (version[:2] == '2.') else 'this should not happen in Python3'))
+ try:
+ obj.__new__(obj.__class__)
+ except TypeError:
+ raise TypeError(('instance "{0:}" of class "{1:}" cannot be encoded because it\'s __new__ method '
+ 'cannot be called, perhaps it requires extra parameters').format(obj, obj.__class__))
+ mod = obj.__class__.__module__
+ if mod == '__main__':
+ mod = None
+ warning(('class {0:} seems to have been defined in the main file; unfortunately this means'
+ ' that it\'s module/import path is unknown, so you might have to provide cls_lookup_map when '
+ 'decoding').format(obj.__class__))
+ name = obj.__class__.__name__
+ if hasattr(obj, '__json_encode__'):
+ attrs = obj.__json_encode__()
+ else:
+ attrs = hashodict(obj.__dict__.items())
+ if primitives:
+ return attrs
+ else:
+ return hashodict((('__instance_type__', (mod, name)), ('attributes', attrs)))
+ return obj
+
+
+def json_complex_encode(obj, primitives=False):
+ """
+ Encode a complex number as a json dictionary of it's real and imaginary part.
+
+ :param obj: complex number, e.g. `2+1j`
+ :return: (dict) json primitives representation of `obj`
+ """
+ if isinstance(obj, complex):
+ if primitives:
+ return [obj.real, obj.imag]
+ else:
+ return hashodict(__complex__=[obj.real, obj.imag])
+ return obj
+
+
+def numeric_types_encode(obj, primitives=False):
+ """
+ Encode Decimal and Fraction.
+
+ :param primitives: Encode decimals and fractions as standard floats. You may lose precision. If you do this, you may need to enable `allow_nan` (decimals always allow NaNs but floats do not).
+ """
+ if isinstance(obj, Decimal):
+ if primitives:
+ return float(obj)
+ else:
+ return {
+ '__decimal__': str(obj.canonical()),
+ }
+ if isinstance(obj, Fraction):
+ if primitives:
+ return float(obj)
+ else:
+ return hashodict((
+ ('__fraction__', True),
+ ('numerator', obj.numerator),
+ ('denominator', obj.denominator),
+ ))
+ return obj
+
+
+class ClassInstanceEncoder(JSONEncoder):
+ """
+ See `class_instance_encoder`.
+ """
+ # Not covered in tests since `class_instance_encode` is recommended way.
+ def __init__(self, obj, encode_cls_instances=True, **kwargs):
+ self.encode_cls_instances = encode_cls_instances
+ super(ClassInstanceEncoder, self).__init__(obj, **kwargs)
+
+ def default(self, obj, *args, **kwargs):
+ if self.encode_cls_instances:
+ obj = class_instance_encode(obj)
+ return super(ClassInstanceEncoder, self).default(obj, *args, **kwargs)
+
+
+def json_set_encode(obj, primitives=False):
+ """
+ Encode python sets as dictionary with key __set__ and a list of the values.
+
+ Try to sort the set to get a consistent json representation, use arbitrary order if the data is not ordinal.
+ """
+ if isinstance(obj, set):
+ try:
+ repr = sorted(obj)
+ except Exception:
+ repr = list(obj)
+ if primitives:
+ return repr
+ else:
+ return hashodict(__set__=repr)
+ return obj
+
+
+def pandas_encode(obj, primitives=False):
+ from pandas import DataFrame, Series
+ if isinstance(obj, (DataFrame, Series)):
+ #todo: this is experimental
+ if not getattr(pandas_encode, '_warned', False):
+ pandas_encode._warned = True
+ warning('Pandas dumping support in json-tricks is experimental and may change in future versions.')
+ if isinstance(obj, DataFrame):
+ repr = hashodict()
+ if not primitives:
+ repr['__pandas_dataframe__'] = hashodict((
+ ('column_order', tuple(obj.columns.values)),
+ ('types', tuple(str(dt) for dt in obj.dtypes)),
+ ))
+ repr['index'] = tuple(obj.index.values)
+ for k, name in enumerate(obj.columns.values):
+ repr[name] = tuple(obj.ix[:, k].values)
+ return repr
+ if isinstance(obj, Series):
+ repr = hashodict()
+ if not primitives:
+ repr['__pandas_series__'] = hashodict((
+ ('name', str(obj.name)),
+ ('type', str(obj.dtype)),
+ ))
+ repr['index'] = tuple(obj.index.values)
+ repr['data'] = tuple(obj.values)
+ return repr
+ return obj
+
+
+def nopandas_encode(obj):
+ if ('DataFrame' in getattr(obj.__class__, '__name__', '') or 'Series' in getattr(obj.__class__, '__name__', '')) \
+ and 'pandas.' in getattr(obj.__class__, '__module__', ''):
+ raise NoPandasException(('Trying to encode an object of type {0:} which appears to be '
+ 'a numpy array, but numpy support is not enabled, perhaps it is not installed.').format(type(obj)))
+ return obj
+
+
+def numpy_encode(obj, primitives=False):
+ """
+ Encodes numpy `ndarray`s as lists with meta data.
+
+ Encodes numpy scalar types as Python equivalents. Special encoding is not possible,
+ because int64 (in py2) and float64 (in py2 and py3) are subclasses of primitives,
+ which never reach the encoder.
+
+ :param primitives: If True, arrays are serialized as (nested) lists without meta info.
+ """
+ from numpy import ndarray, generic
+ if isinstance(obj, ndarray):
+ if primitives:
+ return obj.tolist()
+ else:
+ dct = hashodict((
+ ('__ndarray__', obj.tolist()),
+ ('dtype', str(obj.dtype)),
+ ('shape', obj.shape),
+ ))
+ if len(obj.shape) > 1:
+ dct['Corder'] = obj.flags['C_CONTIGUOUS']
+ return dct
+ elif isinstance(obj, generic):
+ if NumpyEncoder.SHOW_SCALAR_WARNING:
+ NumpyEncoder.SHOW_SCALAR_WARNING = False
+ warning('json-tricks: numpy scalar serialization is experimental and may work differently in future versions')
+ return obj.item()
+ return obj
+
+
+class NumpyEncoder(ClassInstanceEncoder):
+ """
+ JSON encoder for numpy arrays.
+ """
+ SHOW_SCALAR_WARNING = True # show a warning that numpy scalar serialization is experimental
+
+ def default(self, obj, *args, **kwargs):
+ """
+ If input object is a ndarray it will be converted into a dict holding
+ data type, shape and the data. The object can be restored using json_numpy_obj_hook.
+ """
+ warning('`NumpyEncoder` is deprecated, use `numpy_encode`') #todo
+ obj = numpy_encode(obj)
+ return super(NumpyEncoder, self).default(obj, *args, **kwargs)
+
+
+def nonumpy_encode(obj):
+ """
+ Raises an error for numpy arrays.
+ """
+ if 'ndarray' in getattr(obj.__class__, '__name__', '') and 'numpy.' in getattr(obj.__class__, '__module__', ''):
+ raise NoNumpyException(('Trying to encode an object of type {0:} which appears to be '
+ 'a pandas data stucture, but pandas support is not enabled, perhaps it is not installed.').format(type(obj)))
+ return obj
+
+
+class NoNumpyEncoder(JSONEncoder):
+ """
+ See `nonumpy_encode`.
+ """
+ def default(self, obj, *args, **kwargs):
+ warning('`NoNumpyEncoder` is deprecated, use `nonumpy_encode`') #todo
+ obj = nonumpy_encode(obj)
+ return super(NoNumpyEncoder, self).default(obj, *args, **kwargs)
+
+
diff --git a/libs/json_tricks/nonp.py b/libs/json_tricks/nonp.py
new file mode 100644
index 000000000..6522687d3
--- /dev/null
+++ b/libs/json_tricks/nonp.py
@@ -0,0 +1,207 @@
+
+from gzip import GzipFile
+from io import BytesIO
+from json import loads as json_loads
+from os import fsync
+from sys import exc_info, version
+from .utils import NoNumpyException # keep 'unused' imports
+from .comment import strip_comment_line_with_symbol, strip_comments # keep 'unused' imports
+from .encoders import TricksEncoder, json_date_time_encode, class_instance_encode, ClassInstanceEncoder, \
+ json_complex_encode, json_set_encode, numeric_types_encode, numpy_encode, nonumpy_encode, NoNumpyEncoder, \
+ nopandas_encode, pandas_encode # keep 'unused' imports
+from .decoders import DuplicateJsonKeyException, TricksPairHook, json_date_time_hook, ClassInstanceHook, \
+ json_complex_hook, json_set_hook, numeric_types_hook, json_numpy_obj_hook, json_nonumpy_obj_hook, \
+ nopandas_hook, pandas_hook # keep 'unused' imports
+from json import JSONEncoder
+
+
+is_py3 = (version[:2] == '3.')
+str_type = str if is_py3 else (basestring, unicode,)
+ENCODING = 'UTF-8'
+
+
+_cih_instance = ClassInstanceHook()
+DEFAULT_ENCODERS = [json_date_time_encode, class_instance_encode, json_complex_encode, json_set_encode, numeric_types_encode,]
+DEFAULT_HOOKS = [json_date_time_hook, _cih_instance, json_complex_hook, json_set_hook, numeric_types_hook,]
+
+try:
+ import numpy
+except ImportError:
+ DEFAULT_ENCODERS = [nonumpy_encode,] + DEFAULT_ENCODERS
+ DEFAULT_HOOKS = [json_nonumpy_obj_hook,] + DEFAULT_HOOKS
+else:
+ # numpy encode needs to be before complex
+ DEFAULT_ENCODERS = [numpy_encode,] + DEFAULT_ENCODERS
+ DEFAULT_HOOKS = [json_numpy_obj_hook,] + DEFAULT_HOOKS
+
+try:
+ import pandas
+except ImportError:
+ DEFAULT_ENCODERS = [nopandas_encode,] + DEFAULT_ENCODERS
+ DEFAULT_HOOKS = [nopandas_hook,] + DEFAULT_HOOKS
+else:
+ DEFAULT_ENCODERS = [pandas_encode,] + DEFAULT_ENCODERS
+ DEFAULT_HOOKS = [pandas_hook,] + DEFAULT_HOOKS
+
+
+DEFAULT_NONP_ENCODERS = [nonumpy_encode,] + DEFAULT_ENCODERS # DEPRECATED
+DEFAULT_NONP_HOOKS = [json_nonumpy_obj_hook,] + DEFAULT_HOOKS # DEPRECATED
+
+
+def dumps(obj, sort_keys=None, cls=TricksEncoder, obj_encoders=DEFAULT_ENCODERS, extra_obj_encoders=(),
+ primitives=False, compression=None, allow_nan=False, conv_str_byte=False, **jsonkwargs):
+ """
+ Convert a nested data structure to a json string.
+
+ :param obj: The Python object to convert.
+ :param sort_keys: Keep this False if you want order to be preserved.
+ :param cls: The json encoder class to use, defaults to NoNumpyEncoder which gives a warning for numpy arrays.
+ :param obj_encoders: Iterable of encoders to use to convert arbitrary objects into json-able promitives.
+ :param extra_obj_encoders: Like `obj_encoders` but on top of them: use this to add encoders without replacing defaults. Since v3.5 these happen before default encoders.
+ :param allow_nan: Allow NaN and Infinity values, which is a (useful) violation of the JSON standard (default False).
+ :param conv_str_byte: Try to automatically convert between strings and bytes (assuming utf-8) (default False).
+ :return: The string containing the json-encoded version of obj.
+
+ Other arguments are passed on to `cls`. Note that `sort_keys` should be false if you want to preserve order.
+ """
+ if not hasattr(extra_obj_encoders, '__iter__'):
+ raise TypeError('`extra_obj_encoders` should be a tuple in `json_tricks.dump(s)`')
+ encoders = tuple(extra_obj_encoders) + tuple(obj_encoders)
+ txt = cls(sort_keys=sort_keys, obj_encoders=encoders, allow_nan=allow_nan,
+ primitives=primitives, **jsonkwargs).encode(obj)
+ if not is_py3 and isinstance(txt, str):
+ txt = unicode(txt, ENCODING)
+ if not compression:
+ return txt
+ if compression is True:
+ compression = 5
+ txt = txt.encode(ENCODING)
+ sh = BytesIO()
+ with GzipFile(mode='wb', fileobj=sh, compresslevel=compression) as zh:
+ zh.write(txt)
+ gzstring = sh.getvalue()
+ return gzstring
+
+
+def dump(obj, fp, sort_keys=None, cls=TricksEncoder, obj_encoders=DEFAULT_ENCODERS, extra_obj_encoders=(),
+ primitives=False, compression=None, force_flush=False, allow_nan=False, conv_str_byte=False, **jsonkwargs):
+ """
+ Convert a nested data structure to a json string.
+
+ :param fp: File handle or path to write to.
+ :param compression: The gzip compression level, or None for no compression.
+ :param force_flush: If True, flush the file handle used, when possibly also in the operating system (default False).
+
+ The other arguments are identical to `dumps`.
+ """
+ txt = dumps(obj, sort_keys=sort_keys, cls=cls, obj_encoders=obj_encoders, extra_obj_encoders=extra_obj_encoders,
+ primitives=primitives, compression=compression, allow_nan=allow_nan, conv_str_byte=conv_str_byte, **jsonkwargs)
+ if isinstance(fp, str_type):
+ fh = open(fp, 'wb+')
+ else:
+ fh = fp
+ if conv_str_byte:
+ try:
+ fh.write(b'')
+ except TypeError:
+ pass
+ # if not isinstance(txt, str_type):
+ # # Cannot write bytes, so must be in text mode, but we didn't get a text
+ # if not compression:
+ # txt = txt.decode(ENCODING)
+ else:
+ try:
+ fh.write(u'')
+ except TypeError:
+ if isinstance(txt, str_type):
+ txt = txt.encode(ENCODING)
+ try:
+ if 'b' not in getattr(fh, 'mode', 'b?') and not isinstance(txt, str_type) and compression:
+ raise IOError('If compression is enabled, the file must be opened in binary mode.')
+ try:
+ fh.write(txt)
+ except TypeError as err:
+ err.args = (err.args[0] + '. A possible reason is that the file is not opened in binary mode; '
+ 'be sure to set file mode to something like "wb".',)
+ raise
+ finally:
+ if force_flush:
+ fh.flush()
+ try:
+ if fh.fileno() is not None:
+ fsync(fh.fileno())
+ except (ValueError,):
+ pass
+ if isinstance(fp, str_type):
+ fh.close()
+ return txt
+
+
+def loads(string, preserve_order=True, ignore_comments=True, decompression=None, obj_pairs_hooks=DEFAULT_HOOKS,
+ extra_obj_pairs_hooks=(), cls_lookup_map=None, allow_duplicates=True, conv_str_byte=False, **jsonkwargs):
+ """
+ Convert a nested data structure to a json string.
+
+ :param string: The string containing a json encoded data structure.
+ :param decode_cls_instances: True to attempt to decode class instances (requires the environment to be similar the the encoding one).
+ :param preserve_order: Whether to preserve order by using OrderedDicts or not.
+ :param ignore_comments: Remove comments (starting with # or //).
+ :param decompression: True to use gzip decompression, False to use raw data, None to automatically determine (default). Assumes utf-8 encoding!
+ :param obj_pairs_hooks: A list of dictionary hooks to apply.
+ :param extra_obj_pairs_hooks: Like `obj_pairs_hooks` but on top of them: use this to add hooks without replacing defaults. Since v3.5 these happen before default hooks.
+ :param cls_lookup_map: If set to a dict, for example ``globals()``, then classes encoded from __main__ are looked up this dict.
+ :param allow_duplicates: If set to False, an error will be raised when loading a json-map that contains duplicate keys.
+ :param parse_float: A function to parse strings to integers (e.g. Decimal). There is also `parse_int`.
+ :param conv_str_byte: Try to automatically convert between strings and bytes (assuming utf-8) (default False).
+ :return: The string containing the json-encoded version of obj.
+
+ Other arguments are passed on to json_func.
+ """
+ if not hasattr(extra_obj_pairs_hooks, '__iter__'):
+ raise TypeError('`extra_obj_pairs_hooks` should be a tuple in `json_tricks.load(s)`')
+ if decompression is None:
+ decompression = string[:2] == b'\x1f\x8b'
+ if decompression:
+ with GzipFile(fileobj=BytesIO(string), mode='rb') as zh:
+ string = zh.read()
+ string = string.decode(ENCODING)
+ if not isinstance(string, str_type):
+ if conv_str_byte:
+ string = string.decode(ENCODING)
+ else:
+ raise TypeError(('Cannot automatically encode object of type "{0:}" in `json_tricks.load(s)` since '
+ 'the encoding is not known. You should instead encode the bytes to a string and pass that '
+ 'string to `load(s)`, for example bytevar.encode("utf-8") if utf-8 is the encoding.').format(type(string)))
+ if ignore_comments:
+ string = strip_comments(string)
+ obj_pairs_hooks = tuple(obj_pairs_hooks)
+ _cih_instance.cls_lookup_map = cls_lookup_map or {}
+ hooks = tuple(extra_obj_pairs_hooks) + obj_pairs_hooks
+ hook = TricksPairHook(ordered=preserve_order, obj_pairs_hooks=hooks, allow_duplicates=allow_duplicates)
+ return json_loads(string, object_pairs_hook=hook, **jsonkwargs)
+
+
+def load(fp, preserve_order=True, ignore_comments=True, decompression=None, obj_pairs_hooks=DEFAULT_HOOKS,
+ extra_obj_pairs_hooks=(), cls_lookup_map=None, allow_duplicates=True, conv_str_byte=False, **jsonkwargs):
+ """
+ Convert a nested data structure to a json string.
+
+ :param fp: File handle or path to load from.
+
+ The other arguments are identical to loads.
+ """
+ try:
+ if isinstance(fp, str_type):
+ with open(fp, 'rb') as fh:
+ string = fh.read()
+ else:
+ string = fp.read()
+ except UnicodeDecodeError as err:
+ # todo: not covered in tests, is it relevant?
+ raise Exception('There was a problem decoding the file content. A possible reason is that the file is not ' +
+ 'opened in binary mode; be sure to set file mode to something like "rb".').with_traceback(exc_info()[2])
+ return loads(string, preserve_order=preserve_order, ignore_comments=ignore_comments, decompression=decompression,
+ obj_pairs_hooks=obj_pairs_hooks, extra_obj_pairs_hooks=extra_obj_pairs_hooks, cls_lookup_map=cls_lookup_map,
+ allow_duplicates=allow_duplicates, conv_str_byte=conv_str_byte, **jsonkwargs)
+
+
diff --git a/libs/json_tricks/np.py b/libs/json_tricks/np.py
new file mode 100644
index 000000000..676041f9f
--- /dev/null
+++ b/libs/json_tricks/np.py
@@ -0,0 +1,28 @@
+
+"""
+This file exists for backward compatibility reasons.
+"""
+
+from logging import warning
+from .nonp import NoNumpyException, DEFAULT_ENCODERS, DEFAULT_HOOKS, dumps, dump, loads, load # keep 'unused' imports
+from .utils import hashodict, NoPandasException
+from .comment import strip_comment_line_with_symbol, strip_comments # keep 'unused' imports
+from .encoders import TricksEncoder, json_date_time_encode, class_instance_encode, ClassInstanceEncoder, \
+ numpy_encode, NumpyEncoder # keep 'unused' imports
+from .decoders import DuplicateJsonKeyException, TricksPairHook, json_date_time_hook, ClassInstanceHook, \
+ json_complex_hook, json_set_hook, json_numpy_obj_hook # keep 'unused' imports
+
+try:
+ import numpy
+except ImportError:
+ raise NoNumpyException('Could not load numpy, maybe it is not installed? If you do not want to use numpy encoding '
+ 'or decoding, you can import the functions from json_tricks.nonp instead, which do not need numpy.')
+
+
+# todo: warning('`json_tricks.np` is deprecated, you can import directly from `json_tricks`')
+
+
+DEFAULT_NP_ENCODERS = [numpy_encode,] + DEFAULT_ENCODERS # DEPRECATED
+DEFAULT_NP_HOOKS = [json_numpy_obj_hook,] + DEFAULT_HOOKS # DEPRECATED
+
+
diff --git a/libs/json_tricks/np_utils.py b/libs/json_tricks/np_utils.py
new file mode 100644
index 000000000..f2e9936d9
--- /dev/null
+++ b/libs/json_tricks/np_utils.py
@@ -0,0 +1,15 @@
+
+"""
+This file exists for backward compatibility reasons.
+"""
+
+from .utils import hashodict, get_scalar_repr, encode_scalars_inplace
+from .nonp import NoNumpyException
+from . import np
+
+# try:
+# from numpy import generic, complex64, complex128
+# except ImportError:
+# raise NoNumpyException('Could not load numpy, maybe it is not installed?')
+
+
diff --git a/libs/json_tricks/utils.py b/libs/json_tricks/utils.py
new file mode 100644
index 000000000..ace85d913
--- /dev/null
+++ b/libs/json_tricks/utils.py
@@ -0,0 +1,81 @@
+
+from collections import OrderedDict
+
+
+class hashodict(OrderedDict):
+ """
+ This dictionary is hashable. It should NOT be mutated, or all kinds of weird
+ bugs may appear. This is not enforced though, it's only used for encoding.
+ """
+ def __hash__(self):
+ return hash(frozenset(self.items()))
+
+
+try:
+ from inspect import signature
+except ImportError:
+ try:
+ from inspect import getfullargspec
+ except ImportError:
+ from inspect import getargspec
+ def get_arg_names(callable):
+ argspec = getargspec(callable)
+ return set(argspec.args)
+ else:
+ #todo: this is not covered in test case (py 3+ uses `signature`, py2 `getfullargspec`); consider removing it
+ def get_arg_names(callable):
+ argspec = getfullargspec(callable)
+ return set(argspec.args) | set(argspec.kwonlyargs)
+else:
+ def get_arg_names(callable):
+ sig = signature(callable)
+ return set(sig.parameters.keys())
+
+
+def call_with_optional_kwargs(callable, *args, **optional_kwargs):
+ accepted_kwargs = get_arg_names(callable)
+ use_kwargs = {}
+ for key, val in optional_kwargs.items():
+ if key in accepted_kwargs:
+ use_kwargs[key] = val
+ return callable(*args, **use_kwargs)
+
+
+class NoNumpyException(Exception):
+ """ Trying to use numpy features, but numpy cannot be found. """
+
+
+class NoPandasException(Exception):
+ """ Trying to use pandas features, but pandas cannot be found. """
+
+
+def get_scalar_repr(npscalar):
+ return hashodict((
+ ('__ndarray__', npscalar.item()),
+ ('dtype', str(npscalar.dtype)),
+ ('shape', ()),
+ ))
+
+
+def encode_scalars_inplace(obj):
+ """
+ Searches a data structure of lists, tuples and dicts for numpy scalars
+ and replaces them by their dictionary representation, which can be loaded
+ by json-tricks. This happens in-place (the object is changed, use a copy).
+ """
+ from numpy import generic, complex64, complex128
+ if isinstance(obj, (generic, complex64, complex128)):
+ return get_scalar_repr(obj)
+ if isinstance(obj, dict):
+ for key, val in tuple(obj.items()):
+ obj[key] = encode_scalars_inplace(val)
+ return obj
+ if isinstance(obj, list):
+ for k, val in enumerate(obj):
+ obj[k] = encode_scalars_inplace(val)
+ return obj
+ if isinstance(obj, (tuple, set)):
+ return type(obj)(encode_scalars_inplace(val) for val in obj)
+ return obj
+
+