一个实验性但有趣的解决方案是使用元类。下面的解决方案允许使用带有简单继承的Python数据类,而完全不使用数据类装饰器。此外,它可以继承父基类的字段,而不必抱怨位置参数的顺序(非默认字段)。
from collections import OrderedDict
import typing as ty
import dataclasses
from itertools import takewhile
class DataClassTerm:
def __new__(cls, *args, **kwargs):
return super().__new__(cls)
class DataClassMeta(type):
def __new__(cls, clsname, bases, clsdict):
fields = {}
# Get list of base classes including the class to be produced(initialized without its original base classes as those have already become dataclasses)
bases_and_self = [dataclasses.dataclass(super().__new__(cls, clsname, (DataClassTerm,), clsdict))] + list(bases)
# Whatever is a subclass of DataClassTerm will become a DataClassTerm.
# Following block will iterate and create individual dataclasses and collect their fields
for base in bases_and_self[::-1]: # Ensure that last fields in last base is prioritized
if issubclass(base, DataClassTerm):
to_dc_bases = list(takewhile(lambda c: c is not DataClassTerm, base.__mro__))
for dc_base in to_dc_bases[::-1]: # Ensure that last fields in last base in MRO is prioritized(same as in dataclasses)
if dataclasses.is_dataclass(dc_base):
valid_dc = dc_base
else:
valid_dc = dataclasses.dataclass(dc_base)
for field in dataclasses.fields(valid_dc):
fields[field.name] = (field.name, field.type, field)
# Following block will reorder the fields so that fields without default values are first in order
reordered_fields = OrderedDict()
for n, t, f in fields.values():
if f.default is dataclasses.MISSING and f.default_factory is dataclasses.MISSING:
reordered_fields[n] = (n, t, f)
for n, t, f in fields.values():
if n not in reordered_fields.keys():
reordered_fields[n] = (n, t, f)
# Create a new dataclass using `dataclasses.make_dataclass`, which ultimately calls type.__new__, which is the same as super().__new__ in our case
fields = list(reordered_fields.values())
full_dc = dataclasses.make_dataclass(cls_name=clsname, fields=fields, init=True, bases=(DataClassTerm,))
# Discard the created dataclass class and create new one using super but preserve the dataclass specific namespace.
return super().__new__(cls, clsname, bases, {**full_dc.__dict__,**clsdict})
class DataClassCustom(DataClassTerm, metaclass=DataClassMeta):
def __new__(cls, *args, **kwargs):
if len(args)>0:
raise RuntimeError("Do not use positional arguments for initialization.")
return super().__new__(cls, *args, **kwargs)
现在让我们创建一个带有父数据类和混合类的样本数据类:
class DataClassCustomA(DataClassCustom):
field_A_1: int = dataclasses.field()
field_A_2: ty.AnyStr = dataclasses.field(default=None)
class SomeOtherClass:
def methodA(self):
print('print from SomeOtherClass().methodA')
class DataClassCustomB(DataClassCustomA,SomeOtherClass):
field_B_1: int = dataclasses.field()
field_B_2: ty.Dict = dataclasses.field(default_factory=dict)
结果是
result_b = DataClassCustomB(field_A_1=1, field_B_1=2)
result_b
# DataClassCustomB(field_A_1=1, field_B_1=2, field_A_2=None, field_B_2={})
result_b.methodA()
# print from SomeOtherClass().methodA
尝试在每个父类上使用@dataclass装饰器做同样的事情会在接下来的子类中引发一个异常,如TypeError(f'non-default argument <field-name)跟随默认参数')。上面的解决方案防止了这种情况的发生,因为字段首先被重新排序。然而,由于字段的顺序被修改了,在DataClassCustom中防止*args的使用。__new__是强制的,因为原来的顺序不再有效。
虽然在Python >=3.10中引入了kw_only特性,本质上使数据类中的继承更加可靠,但上面的示例仍然可以用作一种使数据类可继承的方法,而不需要使用@dataclass装饰器。