diff options
author | panni <[email protected]> | 2018-10-31 17:08:29 +0100 |
---|---|---|
committer | panni <[email protected]> | 2018-10-31 17:08:29 +0100 |
commit | 8f584143f8afc46a75a83dab5243739772e3562b (patch) | |
tree | c7dae21e993880af8bee71ad7b5a63f2977db577 /libs/json_tricks | |
parent | 4beaeaa99e84bbe1ed87d0466a55a22ba25c8437 (diff) | |
download | bazarr-8f584143f8afc46a75a83dab5243739772e3562b.tar.gz bazarr-8f584143f8afc46a75a83dab5243739772e3562b.zip |
update deps
Diffstat (limited to 'libs/json_tricks')
-rw-r--r-- | libs/json_tricks/__init__.py | 24 | ||||
-rw-r--r-- | libs/json_tricks/comment.py | 29 | ||||
-rw-r--r-- | libs/json_tricks/decoders.py | 248 | ||||
-rw-r--r-- | libs/json_tricks/encoders.py | 311 | ||||
-rw-r--r-- | libs/json_tricks/nonp.py | 207 | ||||
-rw-r--r-- | libs/json_tricks/np.py | 28 | ||||
-rw-r--r-- | libs/json_tricks/np_utils.py | 15 | ||||
-rw-r--r-- | libs/json_tricks/utils.py | 81 |
8 files changed, 943 insertions, 0 deletions
diff --git a/libs/json_tricks/__init__.py b/libs/json_tricks/__init__.py new file mode 100644 index 000000000..8c890c6de --- /dev/null +++ b/libs/json_tricks/__init__.py @@ -0,0 +1,24 @@ + +from .utils import hashodict, NoNumpyException, NoPandasException, get_scalar_repr, encode_scalars_inplace +from .comment import strip_comment_line_with_symbol, strip_comments +from .encoders import TricksEncoder, json_date_time_encode, class_instance_encode, json_complex_encode, \ + numeric_types_encode, ClassInstanceEncoder, json_set_encode, pandas_encode, nopandas_encode, \ + numpy_encode, NumpyEncoder, nonumpy_encode, NoNumpyEncoder +from .decoders import DuplicateJsonKeyException, TricksPairHook, json_date_time_hook, json_complex_hook, \ + numeric_types_hook, ClassInstanceHook, json_set_hook, pandas_hook, nopandas_hook, json_numpy_obj_hook, \ + json_nonumpy_obj_hook +from .nonp import dumps, dump, loads, load + + +try: + # find_module takes just as long as importing, so no optimization possible + import numpy +except ImportError: + NUMPY_MODE = False + # from .nonp import dumps, dump, loads, load, nonumpy_encode as numpy_encode, json_nonumpy_obj_hook as json_numpy_obj_hook +else: + NUMPY_MODE = True + # from .np import dumps, dump, loads, load, numpy_encode, NumpyEncoder, json_numpy_obj_hook + # from .np_utils import encode_scalars_inplace + + diff --git a/libs/json_tricks/comment.py b/libs/json_tricks/comment.py new file mode 100644 index 000000000..8b3c06909 --- /dev/null +++ b/libs/json_tricks/comment.py @@ -0,0 +1,29 @@ + +from re import findall + + +def strip_comment_line_with_symbol(line, start): + parts = line.split(start) + counts = [len(findall(r'(?:^|[^"\\]|(?:\\\\|\\")+)(")', part)) for part in parts] + total = 0 + for nr, count in enumerate(counts): + total += count + if total % 2 == 0: + return start.join(parts[:nr+1]).rstrip() + else: + return line.rstrip() + + +def strip_comments(string, comment_symbols=frozenset(('#', '//'))): + """ + :param string: A string containing json with comments started by comment_symbols. + :param comment_symbols: Iterable of symbols that start a line comment (default # or //). + :return: The string with the comments removed. + """ + lines = string.splitlines() + for k in range(len(lines)): + for symbol in comment_symbols: + lines[k] = strip_comment_line_with_symbol(lines[k], start=symbol) + return '\n'.join(lines) + + diff --git a/libs/json_tricks/decoders.py b/libs/json_tricks/decoders.py new file mode 100644 index 000000000..221a8f93e --- /dev/null +++ b/libs/json_tricks/decoders.py @@ -0,0 +1,248 @@ + +from datetime import datetime, date, time, timedelta +from fractions import Fraction +from importlib import import_module +from collections import OrderedDict +from decimal import Decimal +from logging import warning +from json_tricks import NoPandasException, NoNumpyException + + +class DuplicateJsonKeyException(Exception): + """ Trying to load a json map which contains duplicate keys, but allow_duplicates is False """ + + +class TricksPairHook(object): + """ + Hook that converts json maps to the appropriate python type (dict or OrderedDict) + and then runs any number of hooks on the individual maps. + """ + def __init__(self, ordered=True, obj_pairs_hooks=None, allow_duplicates=True): + """ + :param ordered: True if maps should retain their ordering. + :param obj_pairs_hooks: An iterable of hooks to apply to elements. + """ + self.map_type = OrderedDict + if not ordered: + self.map_type = dict + self.obj_pairs_hooks = [] + if obj_pairs_hooks: + self.obj_pairs_hooks = list(obj_pairs_hooks) + self.allow_duplicates = allow_duplicates + + def __call__(self, pairs): + if not self.allow_duplicates: + known = set() + for key, value in pairs: + if key in known: + raise DuplicateJsonKeyException(('Trying to load a json map which contains a' + + ' duplicate key "{0:}" (but allow_duplicates is False)').format(key)) + known.add(key) + map = self.map_type(pairs) + for hook in self.obj_pairs_hooks: + map = hook(map) + return map + + +def json_date_time_hook(dct): + """ + Return an encoded date, time, datetime or timedelta to it's python representation, including optional timezone. + + :param dct: (dict) json encoded date, time, datetime or timedelta + :return: (date/time/datetime/timedelta obj) python representation of the above + """ + def get_tz(dct): + if not 'tzinfo' in dct: + return None + try: + import pytz + except ImportError as err: + raise ImportError(('Tried to load a json object which has a timezone-aware (date)time. ' + 'However, `pytz` could not be imported, so the object could not be loaded. ' + 'Error: {0:}').format(str(err))) + return pytz.timezone(dct['tzinfo']) + + if isinstance(dct, dict): + if '__date__' in dct: + return date(year=dct.get('year', 0), month=dct.get('month', 0), day=dct.get('day', 0)) + elif '__time__' in dct: + tzinfo = get_tz(dct) + return time(hour=dct.get('hour', 0), minute=dct.get('minute', 0), second=dct.get('second', 0), + microsecond=dct.get('microsecond', 0), tzinfo=tzinfo) + elif '__datetime__' in dct: + tzinfo = get_tz(dct) + return datetime(year=dct.get('year', 0), month=dct.get('month', 0), day=dct.get('day', 0), + hour=dct.get('hour', 0), minute=dct.get('minute', 0), second=dct.get('second', 0), + microsecond=dct.get('microsecond', 0), tzinfo=tzinfo) + elif '__timedelta__' in dct: + return timedelta(days=dct.get('days', 0), seconds=dct.get('seconds', 0), + microseconds=dct.get('microseconds', 0)) + return dct + + +def json_complex_hook(dct): + """ + Return an encoded complex number to it's python representation. + + :param dct: (dict) json encoded complex number (__complex__) + :return: python complex number + """ + if isinstance(dct, dict): + if '__complex__' in dct: + parts = dct['__complex__'] + assert len(parts) == 2 + return parts[0] + parts[1] * 1j + return dct + + +def numeric_types_hook(dct): + if isinstance(dct, dict): + if '__decimal__' in dct: + return Decimal(dct['__decimal__']) + if '__fraction__' in dct: + return Fraction(numerator=dct['numerator'], denominator=dct['denominator']) + return dct + + +class ClassInstanceHook(object): + """ + This hook tries to convert json encoded by class_instance_encoder back to it's original instance. + It only works if the environment is the same, e.g. the class is similarly importable and hasn't changed. + """ + def __init__(self, cls_lookup_map=None): + self.cls_lookup_map = cls_lookup_map or {} + + def __call__(self, dct): + if isinstance(dct, dict) and '__instance_type__' in dct: + mod, name = dct['__instance_type__'] + attrs = dct['attributes'] + if mod is None: + try: + Cls = getattr((__import__('__main__')), name) + except (ImportError, AttributeError) as err: + if not name in self.cls_lookup_map: + raise ImportError(('class {0:s} seems to have been exported from the main file, which means ' + 'it has no module/import path set; you need to provide cls_lookup_map which maps names ' + 'to classes').format(name)) + Cls = self.cls_lookup_map[name] + else: + imp_err = None + try: + module = import_module('{0:}'.format(mod, name)) + except ImportError as err: + imp_err = ('encountered import error "{0:}" while importing "{1:}" to decode a json file; perhaps ' + 'it was encoded in a different environment where {1:}.{2:} was available').format(err, mod, name) + else: + if not hasattr(module, name): + imp_err = 'imported "{0:}" but could find "{1:}" inside while decoding a json file (found {2:}'.format( + module, name, ', '.join(attr for attr in dir(module) if not attr.startswith('_'))) + Cls = getattr(module, name) + if imp_err: + if 'name' in self.cls_lookup_map: + Cls = self.cls_lookup_map[name] + else: + raise ImportError(imp_err) + try: + obj = Cls.__new__(Cls) + except TypeError: + raise TypeError(('problem while decoding instance of "{0:s}"; this instance has a special ' + '__new__ method and can\'t be restored').format(name)) + if hasattr(obj, '__json_decode__'): + obj.__json_decode__(**attrs) + else: + obj.__dict__ = dict(attrs) + return obj + return dct + + +def json_set_hook(dct): + """ + Return an encoded set to it's python representation. + """ + if isinstance(dct, dict): + if '__set__' in dct: + return set((tuple(item) if isinstance(item, list) else item) for item in dct['__set__']) + return dct + + +def pandas_hook(dct): + if '__pandas_dataframe__' in dct or '__pandas_series__' in dct: + # todo: this is experimental + if not getattr(pandas_hook, '_warned', False): + pandas_hook._warned = True + warning('Pandas loading support in json-tricks is experimental and may change in future versions.') + if '__pandas_dataframe__' in dct: + try: + from pandas import DataFrame + except ImportError: + raise NoPandasException('Trying to decode a map which appears to represent a pandas data structure, but pandas appears not to be installed.') + from numpy import dtype, array + meta = dct.pop('__pandas_dataframe__') + indx = dct.pop('index') if 'index' in dct else None + dtypes = dict((colname, dtype(tp)) for colname, tp in zip(meta['column_order'], meta['types'])) + data = OrderedDict() + for name, col in dct.items(): + data[name] = array(col, dtype=dtypes[name]) + return DataFrame( + data=data, + index=indx, + columns=meta['column_order'], + # mixed `dtypes` argument not supported, so use duct of numpy arrays + ) + elif '__pandas_series__' in dct: + from pandas import Series + from numpy import dtype, array + meta = dct.pop('__pandas_series__') + indx = dct.pop('index') if 'index' in dct else None + return Series( + data=dct['data'], + index=indx, + name=meta['name'], + dtype=dtype(meta['type']), + ) + return dct + + +def nopandas_hook(dct): + if isinstance(dct, dict) and ('__pandas_dataframe__' in dct or '__pandas_series__' in dct): + raise NoPandasException(('Trying to decode a map which appears to represent a pandas ' + 'data structure, but pandas support is not enabled, perhaps it is not installed.')) + return dct + + +def json_numpy_obj_hook(dct): + """ + Replace any numpy arrays previously encoded by NumpyEncoder to their proper + shape, data type and data. + + :param dct: (dict) json encoded ndarray + :return: (ndarray) if input was an encoded ndarray + """ + if isinstance(dct, dict) and '__ndarray__' in dct: + try: + from numpy import asarray + import numpy as nptypes + except ImportError: + raise NoNumpyException('Trying to decode a map which appears to represent a numpy ' + 'array, but numpy appears not to be installed.') + order = 'A' + if 'Corder' in dct: + order = 'C' if dct['Corder'] else 'F' + if dct['shape']: + return asarray(dct['__ndarray__'], dtype=dct['dtype'], order=order) + else: + dtype = getattr(nptypes, dct['dtype']) + return dtype(dct['__ndarray__']) + return dct + + +def json_nonumpy_obj_hook(dct): + """ + This hook has no effect except to check if you're trying to decode numpy arrays without support, and give you a useful message. + """ + if isinstance(dct, dict) and '__ndarray__' in dct: + raise NoNumpyException(('Trying to decode a map which appears to represent a numpy array, ' + 'but numpy support is not enabled, perhaps it is not installed.')) + return dct + + diff --git a/libs/json_tricks/encoders.py b/libs/json_tricks/encoders.py new file mode 100644 index 000000000..386e690e5 --- /dev/null +++ b/libs/json_tricks/encoders.py @@ -0,0 +1,311 @@ + +from datetime import datetime, date, time, timedelta +from fractions import Fraction +from logging import warning +from json import JSONEncoder +from sys import version +from decimal import Decimal +from .utils import hashodict, call_with_optional_kwargs, NoPandasException, NoNumpyException + + +class TricksEncoder(JSONEncoder): + """ + Encoder that runs any number of encoder functions or instances on + the objects that are being encoded. + + Each encoder should make any appropriate changes and return an object, + changed or not. This will be passes to the other encoders. + """ + def __init__(self, obj_encoders=None, silence_typeerror=False, primitives=False, **json_kwargs): + """ + :param obj_encoders: An iterable of functions or encoder instances to try. + :param silence_typeerror: If set to True, ignore the TypeErrors that Encoder instances throw (default False). + """ + self.obj_encoders = [] + if obj_encoders: + self.obj_encoders = list(obj_encoders) + self.silence_typeerror = silence_typeerror + self.primitives = primitives + super(TricksEncoder, self).__init__(**json_kwargs) + + def default(self, obj, *args, **kwargs): + """ + This is the method of JSONEncoders that is called for each object; it calls + all the encoders with the previous one's output used as input. + + It works for Encoder instances, but they are expected not to throw + `TypeError` for unrecognized types (the super method does that by default). + + It never calls the `super` method so if there are non-primitive types + left at the end, you'll get an encoding error. + """ + prev_id = id(obj) + for encoder in self.obj_encoders: + if hasattr(encoder, 'default'): + #todo: write test for this scenario (maybe ClassInstanceEncoder?) + try: + obj = call_with_optional_kwargs(encoder.default, obj, primitives=self.primitives) + except TypeError as err: + if not self.silence_typeerror: + raise + elif hasattr(encoder, '__call__'): + obj = call_with_optional_kwargs(encoder, obj, primitives=self.primitives) + else: + raise TypeError('`obj_encoder` {0:} does not have `default` method and is not callable'.format(encoder)) + if id(obj) == prev_id: + #todo: test + raise TypeError('Object of type {0:} could not be encoded by {1:} using encoders [{2:s}]'.format( + type(obj), self.__class__.__name__, ', '.join(str(encoder) for encoder in self.obj_encoders))) + return obj + + +def json_date_time_encode(obj, primitives=False): + """ + Encode a date, time, datetime or timedelta to a string of a json dictionary, including optional timezone. + + :param obj: date/time/datetime/timedelta obj + :return: (dict) json primitives representation of date, time, datetime or timedelta + """ + if primitives and isinstance(obj, (date, time, datetime)): + return obj.isoformat() + if isinstance(obj, datetime): + dct = hashodict([('__datetime__', None), ('year', obj.year), ('month', obj.month), + ('day', obj.day), ('hour', obj.hour), ('minute', obj.minute), + ('second', obj.second), ('microsecond', obj.microsecond)]) + if obj.tzinfo: + dct['tzinfo'] = obj.tzinfo.zone + elif isinstance(obj, date): + dct = hashodict([('__date__', None), ('year', obj.year), ('month', obj.month), ('day', obj.day)]) + elif isinstance(obj, time): + dct = hashodict([('__time__', None), ('hour', obj.hour), ('minute', obj.minute), + ('second', obj.second), ('microsecond', obj.microsecond)]) + if obj.tzinfo: + dct['tzinfo'] = obj.tzinfo.zone + elif isinstance(obj, timedelta): + if primitives: + return obj.total_seconds() + else: + dct = hashodict([('__timedelta__', None), ('days', obj.days), ('seconds', obj.seconds), + ('microseconds', obj.microseconds)]) + else: + return obj + for key, val in tuple(dct.items()): + if not key.startswith('__') and not val: + del dct[key] + return dct + + +def class_instance_encode(obj, primitives=False): + """ + Encodes a class instance to json. Note that it can only be recovered if the environment allows the class to be + imported in the same way. + """ + if isinstance(obj, list) or isinstance(obj, dict): + return obj + if hasattr(obj, '__class__') and hasattr(obj, '__dict__'): + if not hasattr(obj, '__new__'): + raise TypeError('class "{0:s}" does not have a __new__ method; '.format(obj.__class__) + + ('perhaps it is an old-style class not derived from `object`; add `object` as a base class to encode it.' + if (version[:2] == '2.') else 'this should not happen in Python3')) + try: + obj.__new__(obj.__class__) + except TypeError: + raise TypeError(('instance "{0:}" of class "{1:}" cannot be encoded because it\'s __new__ method ' + 'cannot be called, perhaps it requires extra parameters').format(obj, obj.__class__)) + mod = obj.__class__.__module__ + if mod == '__main__': + mod = None + warning(('class {0:} seems to have been defined in the main file; unfortunately this means' + ' that it\'s module/import path is unknown, so you might have to provide cls_lookup_map when ' + 'decoding').format(obj.__class__)) + name = obj.__class__.__name__ + if hasattr(obj, '__json_encode__'): + attrs = obj.__json_encode__() + else: + attrs = hashodict(obj.__dict__.items()) + if primitives: + return attrs + else: + return hashodict((('__instance_type__', (mod, name)), ('attributes', attrs))) + return obj + + +def json_complex_encode(obj, primitives=False): + """ + Encode a complex number as a json dictionary of it's real and imaginary part. + + :param obj: complex number, e.g. `2+1j` + :return: (dict) json primitives representation of `obj` + """ + if isinstance(obj, complex): + if primitives: + return [obj.real, obj.imag] + else: + return hashodict(__complex__=[obj.real, obj.imag]) + return obj + + +def numeric_types_encode(obj, primitives=False): + """ + Encode Decimal and Fraction. + + :param primitives: Encode decimals and fractions as standard floats. You may lose precision. If you do this, you may need to enable `allow_nan` (decimals always allow NaNs but floats do not). + """ + if isinstance(obj, Decimal): + if primitives: + return float(obj) + else: + return { + '__decimal__': str(obj.canonical()), + } + if isinstance(obj, Fraction): + if primitives: + return float(obj) + else: + return hashodict(( + ('__fraction__', True), + ('numerator', obj.numerator), + ('denominator', obj.denominator), + )) + return obj + + +class ClassInstanceEncoder(JSONEncoder): + """ + See `class_instance_encoder`. + """ + # Not covered in tests since `class_instance_encode` is recommended way. + def __init__(self, obj, encode_cls_instances=True, **kwargs): + self.encode_cls_instances = encode_cls_instances + super(ClassInstanceEncoder, self).__init__(obj, **kwargs) + + def default(self, obj, *args, **kwargs): + if self.encode_cls_instances: + obj = class_instance_encode(obj) + return super(ClassInstanceEncoder, self).default(obj, *args, **kwargs) + + +def json_set_encode(obj, primitives=False): + """ + Encode python sets as dictionary with key __set__ and a list of the values. + + Try to sort the set to get a consistent json representation, use arbitrary order if the data is not ordinal. + """ + if isinstance(obj, set): + try: + repr = sorted(obj) + except Exception: + repr = list(obj) + if primitives: + return repr + else: + return hashodict(__set__=repr) + return obj + + +def pandas_encode(obj, primitives=False): + from pandas import DataFrame, Series + if isinstance(obj, (DataFrame, Series)): + #todo: this is experimental + if not getattr(pandas_encode, '_warned', False): + pandas_encode._warned = True + warning('Pandas dumping support in json-tricks is experimental and may change in future versions.') + if isinstance(obj, DataFrame): + repr = hashodict() + if not primitives: + repr['__pandas_dataframe__'] = hashodict(( + ('column_order', tuple(obj.columns.values)), + ('types', tuple(str(dt) for dt in obj.dtypes)), + )) + repr['index'] = tuple(obj.index.values) + for k, name in enumerate(obj.columns.values): + repr[name] = tuple(obj.ix[:, k].values) + return repr + if isinstance(obj, Series): + repr = hashodict() + if not primitives: + repr['__pandas_series__'] = hashodict(( + ('name', str(obj.name)), + ('type', str(obj.dtype)), + )) + repr['index'] = tuple(obj.index.values) + repr['data'] = tuple(obj.values) + return repr + return obj + + +def nopandas_encode(obj): + if ('DataFrame' in getattr(obj.__class__, '__name__', '') or 'Series' in getattr(obj.__class__, '__name__', '')) \ + and 'pandas.' in getattr(obj.__class__, '__module__', ''): + raise NoPandasException(('Trying to encode an object of type {0:} which appears to be ' + 'a numpy array, but numpy support is not enabled, perhaps it is not installed.').format(type(obj))) + return obj + + +def numpy_encode(obj, primitives=False): + """ + Encodes numpy `ndarray`s as lists with meta data. + + Encodes numpy scalar types as Python equivalents. Special encoding is not possible, + because int64 (in py2) and float64 (in py2 and py3) are subclasses of primitives, + which never reach the encoder. + + :param primitives: If True, arrays are serialized as (nested) lists without meta info. + """ + from numpy import ndarray, generic + if isinstance(obj, ndarray): + if primitives: + return obj.tolist() + else: + dct = hashodict(( + ('__ndarray__', obj.tolist()), + ('dtype', str(obj.dtype)), + ('shape', obj.shape), + )) + if len(obj.shape) > 1: + dct['Corder'] = obj.flags['C_CONTIGUOUS'] + return dct + elif isinstance(obj, generic): + if NumpyEncoder.SHOW_SCALAR_WARNING: + NumpyEncoder.SHOW_SCALAR_WARNING = False + warning('json-tricks: numpy scalar serialization is experimental and may work differently in future versions') + return obj.item() + return obj + + +class NumpyEncoder(ClassInstanceEncoder): + """ + JSON encoder for numpy arrays. + """ + SHOW_SCALAR_WARNING = True # show a warning that numpy scalar serialization is experimental + + def default(self, obj, *args, **kwargs): + """ + If input object is a ndarray it will be converted into a dict holding + data type, shape and the data. The object can be restored using json_numpy_obj_hook. + """ + warning('`NumpyEncoder` is deprecated, use `numpy_encode`') #todo + obj = numpy_encode(obj) + return super(NumpyEncoder, self).default(obj, *args, **kwargs) + + +def nonumpy_encode(obj): + """ + Raises an error for numpy arrays. + """ + if 'ndarray' in getattr(obj.__class__, '__name__', '') and 'numpy.' in getattr(obj.__class__, '__module__', ''): + raise NoNumpyException(('Trying to encode an object of type {0:} which appears to be ' + 'a pandas data stucture, but pandas support is not enabled, perhaps it is not installed.').format(type(obj))) + return obj + + +class NoNumpyEncoder(JSONEncoder): + """ + See `nonumpy_encode`. + """ + def default(self, obj, *args, **kwargs): + warning('`NoNumpyEncoder` is deprecated, use `nonumpy_encode`') #todo + obj = nonumpy_encode(obj) + return super(NoNumpyEncoder, self).default(obj, *args, **kwargs) + + diff --git a/libs/json_tricks/nonp.py b/libs/json_tricks/nonp.py new file mode 100644 index 000000000..6522687d3 --- /dev/null +++ b/libs/json_tricks/nonp.py @@ -0,0 +1,207 @@ + +from gzip import GzipFile +from io import BytesIO +from json import loads as json_loads +from os import fsync +from sys import exc_info, version +from .utils import NoNumpyException # keep 'unused' imports +from .comment import strip_comment_line_with_symbol, strip_comments # keep 'unused' imports +from .encoders import TricksEncoder, json_date_time_encode, class_instance_encode, ClassInstanceEncoder, \ + json_complex_encode, json_set_encode, numeric_types_encode, numpy_encode, nonumpy_encode, NoNumpyEncoder, \ + nopandas_encode, pandas_encode # keep 'unused' imports +from .decoders import DuplicateJsonKeyException, TricksPairHook, json_date_time_hook, ClassInstanceHook, \ + json_complex_hook, json_set_hook, numeric_types_hook, json_numpy_obj_hook, json_nonumpy_obj_hook, \ + nopandas_hook, pandas_hook # keep 'unused' imports +from json import JSONEncoder + + +is_py3 = (version[:2] == '3.') +str_type = str if is_py3 else (basestring, unicode,) +ENCODING = 'UTF-8' + + +_cih_instance = ClassInstanceHook() +DEFAULT_ENCODERS = [json_date_time_encode, class_instance_encode, json_complex_encode, json_set_encode, numeric_types_encode,] +DEFAULT_HOOKS = [json_date_time_hook, _cih_instance, json_complex_hook, json_set_hook, numeric_types_hook,] + +try: + import numpy +except ImportError: + DEFAULT_ENCODERS = [nonumpy_encode,] + DEFAULT_ENCODERS + DEFAULT_HOOKS = [json_nonumpy_obj_hook,] + DEFAULT_HOOKS +else: + # numpy encode needs to be before complex + DEFAULT_ENCODERS = [numpy_encode,] + DEFAULT_ENCODERS + DEFAULT_HOOKS = [json_numpy_obj_hook,] + DEFAULT_HOOKS + +try: + import pandas +except ImportError: + DEFAULT_ENCODERS = [nopandas_encode,] + DEFAULT_ENCODERS + DEFAULT_HOOKS = [nopandas_hook,] + DEFAULT_HOOKS +else: + DEFAULT_ENCODERS = [pandas_encode,] + DEFAULT_ENCODERS + DEFAULT_HOOKS = [pandas_hook,] + DEFAULT_HOOKS + + +DEFAULT_NONP_ENCODERS = [nonumpy_encode,] + DEFAULT_ENCODERS # DEPRECATED +DEFAULT_NONP_HOOKS = [json_nonumpy_obj_hook,] + DEFAULT_HOOKS # DEPRECATED + + +def dumps(obj, sort_keys=None, cls=TricksEncoder, obj_encoders=DEFAULT_ENCODERS, extra_obj_encoders=(), + primitives=False, compression=None, allow_nan=False, conv_str_byte=False, **jsonkwargs): + """ + Convert a nested data structure to a json string. + + :param obj: The Python object to convert. + :param sort_keys: Keep this False if you want order to be preserved. + :param cls: The json encoder class to use, defaults to NoNumpyEncoder which gives a warning for numpy arrays. + :param obj_encoders: Iterable of encoders to use to convert arbitrary objects into json-able promitives. + :param extra_obj_encoders: Like `obj_encoders` but on top of them: use this to add encoders without replacing defaults. Since v3.5 these happen before default encoders. + :param allow_nan: Allow NaN and Infinity values, which is a (useful) violation of the JSON standard (default False). + :param conv_str_byte: Try to automatically convert between strings and bytes (assuming utf-8) (default False). + :return: The string containing the json-encoded version of obj. + + Other arguments are passed on to `cls`. Note that `sort_keys` should be false if you want to preserve order. + """ + if not hasattr(extra_obj_encoders, '__iter__'): + raise TypeError('`extra_obj_encoders` should be a tuple in `json_tricks.dump(s)`') + encoders = tuple(extra_obj_encoders) + tuple(obj_encoders) + txt = cls(sort_keys=sort_keys, obj_encoders=encoders, allow_nan=allow_nan, + primitives=primitives, **jsonkwargs).encode(obj) + if not is_py3 and isinstance(txt, str): + txt = unicode(txt, ENCODING) + if not compression: + return txt + if compression is True: + compression = 5 + txt = txt.encode(ENCODING) + sh = BytesIO() + with GzipFile(mode='wb', fileobj=sh, compresslevel=compression) as zh: + zh.write(txt) + gzstring = sh.getvalue() + return gzstring + + +def dump(obj, fp, sort_keys=None, cls=TricksEncoder, obj_encoders=DEFAULT_ENCODERS, extra_obj_encoders=(), + primitives=False, compression=None, force_flush=False, allow_nan=False, conv_str_byte=False, **jsonkwargs): + """ + Convert a nested data structure to a json string. + + :param fp: File handle or path to write to. + :param compression: The gzip compression level, or None for no compression. + :param force_flush: If True, flush the file handle used, when possibly also in the operating system (default False). + + The other arguments are identical to `dumps`. + """ + txt = dumps(obj, sort_keys=sort_keys, cls=cls, obj_encoders=obj_encoders, extra_obj_encoders=extra_obj_encoders, + primitives=primitives, compression=compression, allow_nan=allow_nan, conv_str_byte=conv_str_byte, **jsonkwargs) + if isinstance(fp, str_type): + fh = open(fp, 'wb+') + else: + fh = fp + if conv_str_byte: + try: + fh.write(b'') + except TypeError: + pass + # if not isinstance(txt, str_type): + # # Cannot write bytes, so must be in text mode, but we didn't get a text + # if not compression: + # txt = txt.decode(ENCODING) + else: + try: + fh.write(u'') + except TypeError: + if isinstance(txt, str_type): + txt = txt.encode(ENCODING) + try: + if 'b' not in getattr(fh, 'mode', 'b?') and not isinstance(txt, str_type) and compression: + raise IOError('If compression is enabled, the file must be opened in binary mode.') + try: + fh.write(txt) + except TypeError as err: + err.args = (err.args[0] + '. A possible reason is that the file is not opened in binary mode; ' + 'be sure to set file mode to something like "wb".',) + raise + finally: + if force_flush: + fh.flush() + try: + if fh.fileno() is not None: + fsync(fh.fileno()) + except (ValueError,): + pass + if isinstance(fp, str_type): + fh.close() + return txt + + +def loads(string, preserve_order=True, ignore_comments=True, decompression=None, obj_pairs_hooks=DEFAULT_HOOKS, + extra_obj_pairs_hooks=(), cls_lookup_map=None, allow_duplicates=True, conv_str_byte=False, **jsonkwargs): + """ + Convert a nested data structure to a json string. + + :param string: The string containing a json encoded data structure. + :param decode_cls_instances: True to attempt to decode class instances (requires the environment to be similar the the encoding one). + :param preserve_order: Whether to preserve order by using OrderedDicts or not. + :param ignore_comments: Remove comments (starting with # or //). + :param decompression: True to use gzip decompression, False to use raw data, None to automatically determine (default). Assumes utf-8 encoding! + :param obj_pairs_hooks: A list of dictionary hooks to apply. + :param extra_obj_pairs_hooks: Like `obj_pairs_hooks` but on top of them: use this to add hooks without replacing defaults. Since v3.5 these happen before default hooks. + :param cls_lookup_map: If set to a dict, for example ``globals()``, then classes encoded from __main__ are looked up this dict. + :param allow_duplicates: If set to False, an error will be raised when loading a json-map that contains duplicate keys. + :param parse_float: A function to parse strings to integers (e.g. Decimal). There is also `parse_int`. + :param conv_str_byte: Try to automatically convert between strings and bytes (assuming utf-8) (default False). + :return: The string containing the json-encoded version of obj. + + Other arguments are passed on to json_func. + """ + if not hasattr(extra_obj_pairs_hooks, '__iter__'): + raise TypeError('`extra_obj_pairs_hooks` should be a tuple in `json_tricks.load(s)`') + if decompression is None: + decompression = string[:2] == b'\x1f\x8b' + if decompression: + with GzipFile(fileobj=BytesIO(string), mode='rb') as zh: + string = zh.read() + string = string.decode(ENCODING) + if not isinstance(string, str_type): + if conv_str_byte: + string = string.decode(ENCODING) + else: + raise TypeError(('Cannot automatically encode object of type "{0:}" in `json_tricks.load(s)` since ' + 'the encoding is not known. You should instead encode the bytes to a string and pass that ' + 'string to `load(s)`, for example bytevar.encode("utf-8") if utf-8 is the encoding.').format(type(string))) + if ignore_comments: + string = strip_comments(string) + obj_pairs_hooks = tuple(obj_pairs_hooks) + _cih_instance.cls_lookup_map = cls_lookup_map or {} + hooks = tuple(extra_obj_pairs_hooks) + obj_pairs_hooks + hook = TricksPairHook(ordered=preserve_order, obj_pairs_hooks=hooks, allow_duplicates=allow_duplicates) + return json_loads(string, object_pairs_hook=hook, **jsonkwargs) + + +def load(fp, preserve_order=True, ignore_comments=True, decompression=None, obj_pairs_hooks=DEFAULT_HOOKS, + extra_obj_pairs_hooks=(), cls_lookup_map=None, allow_duplicates=True, conv_str_byte=False, **jsonkwargs): + """ + Convert a nested data structure to a json string. + + :param fp: File handle or path to load from. + + The other arguments are identical to loads. + """ + try: + if isinstance(fp, str_type): + with open(fp, 'rb') as fh: + string = fh.read() + else: + string = fp.read() + except UnicodeDecodeError as err: + # todo: not covered in tests, is it relevant? + raise Exception('There was a problem decoding the file content. A possible reason is that the file is not ' + + 'opened in binary mode; be sure to set file mode to something like "rb".').with_traceback(exc_info()[2]) + return loads(string, preserve_order=preserve_order, ignore_comments=ignore_comments, decompression=decompression, + obj_pairs_hooks=obj_pairs_hooks, extra_obj_pairs_hooks=extra_obj_pairs_hooks, cls_lookup_map=cls_lookup_map, + allow_duplicates=allow_duplicates, conv_str_byte=conv_str_byte, **jsonkwargs) + + diff --git a/libs/json_tricks/np.py b/libs/json_tricks/np.py new file mode 100644 index 000000000..676041f9f --- /dev/null +++ b/libs/json_tricks/np.py @@ -0,0 +1,28 @@ + +""" +This file exists for backward compatibility reasons. +""" + +from logging import warning +from .nonp import NoNumpyException, DEFAULT_ENCODERS, DEFAULT_HOOKS, dumps, dump, loads, load # keep 'unused' imports +from .utils import hashodict, NoPandasException +from .comment import strip_comment_line_with_symbol, strip_comments # keep 'unused' imports +from .encoders import TricksEncoder, json_date_time_encode, class_instance_encode, ClassInstanceEncoder, \ + numpy_encode, NumpyEncoder # keep 'unused' imports +from .decoders import DuplicateJsonKeyException, TricksPairHook, json_date_time_hook, ClassInstanceHook, \ + json_complex_hook, json_set_hook, json_numpy_obj_hook # keep 'unused' imports + +try: + import numpy +except ImportError: + raise NoNumpyException('Could not load numpy, maybe it is not installed? If you do not want to use numpy encoding ' + 'or decoding, you can import the functions from json_tricks.nonp instead, which do not need numpy.') + + +# todo: warning('`json_tricks.np` is deprecated, you can import directly from `json_tricks`') + + +DEFAULT_NP_ENCODERS = [numpy_encode,] + DEFAULT_ENCODERS # DEPRECATED +DEFAULT_NP_HOOKS = [json_numpy_obj_hook,] + DEFAULT_HOOKS # DEPRECATED + + diff --git a/libs/json_tricks/np_utils.py b/libs/json_tricks/np_utils.py new file mode 100644 index 000000000..f2e9936d9 --- /dev/null +++ b/libs/json_tricks/np_utils.py @@ -0,0 +1,15 @@ + +""" +This file exists for backward compatibility reasons. +""" + +from .utils import hashodict, get_scalar_repr, encode_scalars_inplace +from .nonp import NoNumpyException +from . import np + +# try: +# from numpy import generic, complex64, complex128 +# except ImportError: +# raise NoNumpyException('Could not load numpy, maybe it is not installed?') + + diff --git a/libs/json_tricks/utils.py b/libs/json_tricks/utils.py new file mode 100644 index 000000000..ace85d913 --- /dev/null +++ b/libs/json_tricks/utils.py @@ -0,0 +1,81 @@ + +from collections import OrderedDict + + +class hashodict(OrderedDict): + """ + This dictionary is hashable. It should NOT be mutated, or all kinds of weird + bugs may appear. This is not enforced though, it's only used for encoding. + """ + def __hash__(self): + return hash(frozenset(self.items())) + + +try: + from inspect import signature +except ImportError: + try: + from inspect import getfullargspec + except ImportError: + from inspect import getargspec + def get_arg_names(callable): + argspec = getargspec(callable) + return set(argspec.args) + else: + #todo: this is not covered in test case (py 3+ uses `signature`, py2 `getfullargspec`); consider removing it + def get_arg_names(callable): + argspec = getfullargspec(callable) + return set(argspec.args) | set(argspec.kwonlyargs) +else: + def get_arg_names(callable): + sig = signature(callable) + return set(sig.parameters.keys()) + + +def call_with_optional_kwargs(callable, *args, **optional_kwargs): + accepted_kwargs = get_arg_names(callable) + use_kwargs = {} + for key, val in optional_kwargs.items(): + if key in accepted_kwargs: + use_kwargs[key] = val + return callable(*args, **use_kwargs) + + +class NoNumpyException(Exception): + """ Trying to use numpy features, but numpy cannot be found. """ + + +class NoPandasException(Exception): + """ Trying to use pandas features, but pandas cannot be found. """ + + +def get_scalar_repr(npscalar): + return hashodict(( + ('__ndarray__', npscalar.item()), + ('dtype', str(npscalar.dtype)), + ('shape', ()), + )) + + +def encode_scalars_inplace(obj): + """ + Searches a data structure of lists, tuples and dicts for numpy scalars + and replaces them by their dictionary representation, which can be loaded + by json-tricks. This happens in-place (the object is changed, use a copy). + """ + from numpy import generic, complex64, complex128 + if isinstance(obj, (generic, complex64, complex128)): + return get_scalar_repr(obj) + if isinstance(obj, dict): + for key, val in tuple(obj.items()): + obj[key] = encode_scalars_inplace(val) + return obj + if isinstance(obj, list): + for k, val in enumerate(obj): + obj[k] = encode_scalars_inplace(val) + return obj + if isinstance(obj, (tuple, set)): + return type(obj)(encode_scalars_inplace(val) for val in obj) + return obj + + |