如何使一个Python类序列化?
class FileItem:
def __init__(self, fname):
self.fname = fname
尝试序列化为JSON:
>>> import json
>>> x = FileItem('/foo/bar')
>>> json.dumps(x)
TypeError: Object of type 'FileItem' is not JSON serializable
如何使一个Python类序列化?
class FileItem:
def __init__(self, fname):
self.fname = fname
尝试序列化为JSON:
>>> import json
>>> x = FileItem('/foo/bar')
>>> json.dumps(x)
TypeError: Object of type 'FileItem' is not JSON serializable
当前回答
任何人都想在没有外部库的情况下使用基本转换,这只是如何使用以下方式覆盖自定义类的__iter__ & __str__函数。
class JSONCustomEncoder(json.JSONEncoder):
def default(self, obj):
return obj.__dict__
class Student:
def __init__(self, name: str, slug: str):
self.name = name
self.age = age
def __iter__(self):
yield from {
"name": self.name,
"age": self.age,
}.items()
def __str__(self):
return json.dumps(
self.__dict__, cls=JSONCustomEncoder, ensure_ascii=False
)
通过在dict()中进行包装来使用该对象,从而保留数据。
s = Student("aman", 24)
dict(s)
其他回答
我们经常在日志文件中转储JSON格式的复杂字典。虽然大多数字段携带重要信息,但我们不太关心内置的类对象(例如子进程)。Popen对象)。由于存在这些不可序列化的对象,对json.dumps()的调用会失败。
为了解决这个问题,我构建了一个小函数来转储对象的字符串表示形式,而不是转储对象本身。如果您正在处理的数据结构嵌套太多,您可以指定嵌套的最大级别/深度。
from time import time
def safe_serialize(obj , max_depth = 2):
max_level = max_depth
def _safe_serialize(obj , current_level = 0):
nonlocal max_level
# If it is a list
if isinstance(obj , list):
if current_level >= max_level:
return "[...]"
result = list()
for element in obj:
result.append(_safe_serialize(element , current_level + 1))
return result
# If it is a dict
elif isinstance(obj , dict):
if current_level >= max_level:
return "{...}"
result = dict()
for key , value in obj.items():
result[f"{_safe_serialize(key , current_level + 1)}"] = _safe_serialize(value , current_level + 1)
return result
# If it is an object of builtin class
elif hasattr(obj , "__dict__"):
if hasattr(obj , "__repr__"):
result = f"{obj.__repr__()}_{int(time())}"
else:
try:
result = f"{obj.__class__.__name__}_object_{int(time())}"
except:
result = f"object_{int(time())}"
return result
# If it is anything else
else:
return obj
return _safe_serialize(obj)
由于字典也可以有不可序列化的键,转储它们的类名或对象表示将导致所有键都具有相同的名称,这将抛出错误,因为所有键都需要有唯一的名称,这就是为什么当前时间Since epoch被int(time())附加到对象名称。
可以使用以下具有不同级别/深度的嵌套字典来测试该函数
d = {
"a" : {
"a1" : {
"a11" : {
"a111" : "some_value" ,
"a112" : "some_value" ,
} ,
"a12" : {
"a121" : "some_value" ,
"a122" : "some_value" ,
} ,
} ,
"a2" : {
"a21" : {
"a211" : "some_value" ,
"a212" : "some_value" ,
} ,
"a22" : {
"a221" : "some_value" ,
"a222" : "some_value" ,
} ,
} ,
} ,
"b" : {
"b1" : {
"b11" : {
"b111" : "some_value" ,
"b112" : "some_value" ,
} ,
"b12" : {
"b121" : "some_value" ,
"b122" : "some_value" ,
} ,
} ,
"b2" : {
"b21" : {
"b211" : "some_value" ,
"b212" : "some_value" ,
} ,
"b22" : {
"b221" : "some_value" ,
"b222" : "some_value" ,
} ,
} ,
} ,
"c" : subprocess.Popen("ls -l".split() , stdout = subprocess.PIPE , stderr = subprocess.PIPE) ,
}
执行以下命令将会得到-
print("LEVEL 3")
print(json.dumps(safe_serialize(d , 3) , indent = 4))
print("\n\n\nLEVEL 2")
print(json.dumps(safe_serialize(d , 2) , indent = 4))
print("\n\n\nLEVEL 1")
print(json.dumps(safe_serialize(d , 1) , indent = 4))
结果:
LEVEL 3
{
"a": {
"a1": {
"a11": "{...}",
"a12": "{...}"
},
"a2": {
"a21": "{...}",
"a22": "{...}"
}
},
"b": {
"b1": {
"b11": "{...}",
"b12": "{...}"
},
"b2": {
"b21": "{...}",
"b22": "{...}"
}
},
"c": "<Popen: returncode: None args: ['ls', '-l']>"
}
LEVEL 2
{
"a": {
"a1": "{...}",
"a2": "{...}"
},
"b": {
"b1": "{...}",
"b2": "{...}"
},
"c": "<Popen: returncode: None args: ['ls', '-l']>"
}
LEVEL 1
{
"a": "{...}",
"b": "{...}",
"c": "<Popen: returncode: None args: ['ls', '-l']>"
}
[注意]:仅在不关心内置类对象的序列化时使用此选项。
这个函数使用递归迭代遍历字典的每个部分,然后调用非内置类型类的repr()方法。
def sterilize(obj):
object_type = type(obj)
if isinstance(obj, dict):
return {k: sterilize(v) for k, v in obj.items()}
elif object_type in (list, tuple):
return [sterilize(v) for v in obj]
elif object_type in (str, int, bool, float):
return obj
else:
return obj.__repr__()
Jsonweb似乎是我的最佳解决方案。参见http://www.jsonweb.info/en/latest/
from jsonweb.encode import to_object, dumper
@to_object()
class DataModel(object):
def __init__(self, id, value):
self.id = id
self.value = value
>>> data = DataModel(5, "foo")
>>> dumper(data)
'{"__type__": "DataModel", "id": 5, "value": "foo"}'
我选择使用装饰器来解决datetime对象序列化问题。 这是我的代码:
#myjson.py
#Author: jmooremcc 7/16/2017
import json
from datetime import datetime, date, time, timedelta
"""
This module uses decorators to serialize date objects using json
The filename is myjson.py
In another module you simply add the following import statement:
from myjson import json
json.dumps and json.dump will then correctly serialize datetime and date
objects
"""
def json_serial(obj):
"""JSON serializer for objects not serializable by default json code"""
if isinstance(obj, (datetime, date)):
serial = str(obj)
return serial
raise TypeError ("Type %s not serializable" % type(obj))
def FixDumps(fn):
def hook(obj):
return fn(obj, default=json_serial)
return hook
def FixDump(fn):
def hook(obj, fp):
return fn(obj,fp, default=json_serial)
return hook
json.dumps=FixDumps(json.dumps)
json.dump=FixDump(json.dump)
if __name__=="__main__":
today=datetime.now()
data={'atime':today, 'greet':'Hello'}
str=json.dumps(data)
print str
通过导入上述模块,我的其他模块以正常的方式(没有指定默认关键字)使用json来序列化包含日期时间对象的数据。datetime序列化器代码会自动为json调用。Dumps和json.dump。
这对我来说很有效:
class JsonSerializable(object):
def serialize(self):
return json.dumps(self.__dict__)
def __repr__(self):
return self.serialize()
@staticmethod
def dumper(obj):
if "serialize" in dir(obj):
return obj.serialize()
return obj.__dict__
然后
class FileItem(JsonSerializable):
...
and
log.debug(json.dumps(<my object>, default=JsonSerializable.dumper, indent=2))