我目前正在尝试Python 3.7中引入的新数据类结构。我目前被困在试图做一些继承的父类。看起来参数的顺序被我当前的方法搞砸了,比如子类中的bool形参在其他形参之前传递。这将导致一个类型错误。

from dataclasses import dataclass

@dataclass
class Parent:
    name: str
    age: int
    ugly: bool = False

    def print_name(self):
        print(self.name)

    def print_age(self):
        print(self.age)

    def print_id(self):
        print(f'The Name is {self.name} and {self.name} is {self.age} year old')

@dataclass
class Child(Parent):
    school: str
    ugly: bool = True


jack = Parent('jack snr', 32, ugly=True)
jack_son = Child('jack jnr', 12, school = 'havard', ugly=True)

jack.print_id()
jack_son.print_id()

当我运行这段代码时,我得到这个TypeError:

TypeError: non-default argument 'school' follows default argument

我怎么解决这个问题?


当前回答

基于Martijn Pieters的解决方案,我做了以下工作:

1)创建一个实现post_init的混合

from dataclasses import dataclass

no_default = object()


@dataclass
class NoDefaultAttributesPostInitMixin:

    def __post_init__(self):
        for key, value in self.__dict__.items():
            if value is no_default:
                raise TypeError(
                    f"__init__ missing 1 required argument: '{key}'"
                )

2)然后在有继承问题的类中:

from src.utils import no_default, NoDefaultAttributesChild

@dataclass
class MyDataclass(DataclassWithDefaults, NoDefaultAttributesPostInitMixin):
    attr1: str = no_default

编辑:

一段时间后,我也发现这个解决方案与mypy的问题,下面的代码修复这个问题。

from dataclasses import dataclass
from typing import TypeVar, Generic, Union

T = TypeVar("T")


class NoDefault(Generic[T]):
    ...


NoDefaultVar = Union[NoDefault[T], T]
no_default: NoDefault = NoDefault()


@dataclass
class NoDefaultAttributesPostInitMixin:
    def __post_init__(self):
        for key, value in self.__dict__.items():
            if value is NoDefault:
                raise TypeError(f"__init__ missing 1 required argument: '{key}'")


@dataclass
class Parent(NoDefaultAttributesPostInitMixin):
    a: str = ""

@dataclass
class Child(Foo):
    b: NoDefaultVar[str] = no_default

其他回答

一个快速而肮脏的解决方案:

from typing import Optional

@dataclass
class Child(Parent):
    school: Optional[str] = None
    ugly: bool = True

    def __post_init__(self):
        assert self.school is not None

然后返回并重构一次(希望如此)扩展了语言。

你可以使用数据类的修改版本,它将生成一个只包含关键字的__init__方法:

import dataclasses


def _init_fn(fields, frozen, has_post_init, self_name):
    # fields contains both real fields and InitVar pseudo-fields.
    globals = {'MISSING': dataclasses.MISSING,
               '_HAS_DEFAULT_FACTORY': dataclasses._HAS_DEFAULT_FACTORY}

    body_lines = []
    for f in fields:
        line = dataclasses._field_init(f, frozen, globals, self_name)
        # line is None means that this field doesn't require
        # initialization (it's a pseudo-field).  Just skip it.
        if line:
            body_lines.append(line)

    # Does this class have a post-init function?
    if has_post_init:
        params_str = ','.join(f.name for f in fields
                              if f._field_type is dataclasses._FIELD_INITVAR)
        body_lines.append(f'{self_name}.{dataclasses._POST_INIT_NAME}({params_str})')

    # If no body lines, use 'pass'.
    if not body_lines:
        body_lines = ['pass']

    locals = {f'_type_{f.name}': f.type for f in fields}
    return dataclasses._create_fn('__init__',
                      [self_name, '*'] + [dataclasses._init_param(f) for f in fields if f.init],
                      body_lines,
                      locals=locals,
                      globals=globals,
                      return_type=None)


def add_init(cls, frozen):
    fields = getattr(cls, dataclasses._FIELDS)

    # Does this class have a post-init function?
    has_post_init = hasattr(cls, dataclasses._POST_INIT_NAME)

    # Include InitVars and regular fields (so, not ClassVars).
    flds = [f for f in fields.values()
            if f._field_type in (dataclasses._FIELD, dataclasses._FIELD_INITVAR)]
    dataclasses._set_new_attribute(cls, '__init__',
                       _init_fn(flds,
                                frozen,
                                has_post_init,
                                # The name to use for the "self"
                                # param in __init__.  Use "self"
                                # if possible.
                                '__dataclass_self__' if 'self' in fields
                                else 'self',
                                ))

    return cls


# a dataclass with a constructor that only takes keyword arguments
def dataclass_keyword_only(_cls=None, *, repr=True, eq=True, order=False,
              unsafe_hash=False, frozen=False):
    def wrap(cls):
        cls = dataclasses.dataclass(
            cls, init=False, repr=repr, eq=eq, order=order, unsafe_hash=unsafe_hash, frozen=frozen)
        return add_init(cls, frozen)

    # See if we're being called as @dataclass or @dataclass().
    if _cls is None:
        # We're called with parens.
        return wrap

    # We're called as @dataclass without parens.
    return wrap(_cls)

(也作为要点发布,用Python 3.6 backport测试)

这需要将子类定义为

@dataclass_keyword_only
class Child(Parent):
    school: str
    ugly: bool = True

并且会生成__init__(self, *, name:str, age:int, ugly:bool=True, school:str)(这是有效的python)。这里唯一的警告是不允许使用位置参数初始化对象,但除此之外,它是一个完全常规的数据类,没有丑陋的hack。

如果将属性从init函数中排除,则可以在父类中使用带有默认值的属性。如果您需要覆盖init的默认值,请使用Praveen Kulkarni的答案扩展代码。

from dataclasses import dataclass, field

@dataclass
class Parent:
    name: str
    age: int
    ugly: bool = field(default=False, init=False)

@dataclass
class Child(Parent):
    school: str

jack = Parent('jack snr', 32)
jack_son = Child('jack jnr', 12, school = 'havard')
jack_son.ugly = True

甚至

@dataclass
class Child(Parent):
    school: str
    ugly = True
    # This does not work
    # ugly: bool = True

jack_son = Child('jack jnr', 12, school = 'havard')
assert jack_son.ugly

当您使用Python继承创建数据类时,不能保证所有具有默认值的字段将出现在所有没有默认值的字段之后。

一个简单的解决方案是避免使用多重继承来构造“合并”数据类。相反,我们可以通过对父数据类的字段进行过滤和排序来构建合并的数据类。

试试这个merge_dataclasses()函数:

import dataclasses
import functools
from typing import Iterable, Type


def merge_dataclasses(
    cls_name: str,
    *,
    merge_from: Iterable[Type],
    **kwargs,
):
    """
    Construct a dataclass by merging the fields
    from an arbitrary number of dataclasses.

    Args:
        cls_name: The name of the constructed dataclass.

        merge_from: An iterable of dataclasses
            whose fields should be merged.

        **kwargs: Keyword arguments are passed to
            :py:func:`dataclasses.make_dataclass`.

    Returns:
        Returns a new dataclass
    """
    # Merge the fields from the dataclasses,
    # with field names from later dataclasses overwriting
    # any conflicting predecessor field names.
    each_base_fields = [d.__dataclass_fields__ for d in merge_from]
    merged_fields = functools.reduce(
        lambda x, y: {**x, **y}, each_base_fields
    )

    # We have to reorder all of the fields from all of the dataclasses
    # so that *all* of the fields without defaults appear
    # in the merged dataclass *before* all of the fields with defaults.
    fields_without_defaults = [
        (f.name, f.type, f)
        for f in merged_fields.values()
        if isinstance(f.default, dataclasses._MISSING_TYPE)
    ]
    fields_with_defaults = [
        (f.name, f.type, f)
        for f in merged_fields.values()
        if not isinstance(f.default, dataclasses._MISSING_TYPE)
    ]
    fields = [*fields_without_defaults, *fields_with_defaults]

    return dataclasses.make_dataclass(
        cls_name=cls_name,
        fields=fields,
        **kwargs,
    )

然后,您可以按照如下方式合并数据类。注意,我们可以合并A和B,默认字段B和d被移动到合并的数据类的末尾。

@dataclasses.dataclass
class A:
    a: int
    b: int = 0


@dataclasses.dataclass
class B:
    c: int
    d: int = 0


C = merge_dataclasses(
    "C",
    merge_from=[A, B],
)

# Note that 
print(C(a=1, d=1).__dict__)
# {'a': 1, 'd': 1, 'b': 0, 'c': 0}

当然,这种解决方案的缺陷是C实际上不继承A和B,这意味着您不能使用isinstance()或其他类型断言来验证C的亲本。

一个实验性但有趣的解决方案是使用元类。下面的解决方案允许使用带有简单继承的Python数据类,而完全不使用数据类装饰器。此外,它可以继承父基类的字段,而不必抱怨位置参数的顺序(非默认字段)。

from collections import OrderedDict
import typing as ty
import dataclasses
from itertools import takewhile

class DataClassTerm:
    def __new__(cls, *args, **kwargs):
        return super().__new__(cls)

class DataClassMeta(type):
    def __new__(cls, clsname, bases, clsdict):
        fields = {}

        # Get list of base classes including the class to be produced(initialized without its original base classes as those have already become dataclasses)
        bases_and_self = [dataclasses.dataclass(super().__new__(cls, clsname, (DataClassTerm,), clsdict))] + list(bases)

        # Whatever is a subclass of DataClassTerm will become a DataClassTerm. 
        # Following block will iterate and create individual dataclasses and collect their fields
        for base in bases_and_self[::-1]: # Ensure that last fields in last base is prioritized
            if issubclass(base, DataClassTerm):
                to_dc_bases = list(takewhile(lambda c: c is not DataClassTerm, base.__mro__))
                for dc_base in to_dc_bases[::-1]: # Ensure that last fields in last base in MRO is prioritized(same as in dataclasses)
                    if dataclasses.is_dataclass(dc_base):
                        valid_dc = dc_base
                    else:
                        valid_dc = dataclasses.dataclass(dc_base)
                    for field in dataclasses.fields(valid_dc):
                        fields[field.name] = (field.name, field.type, field)
        
        # Following block will reorder the fields so that fields without default values are first in order
        reordered_fields = OrderedDict()
        for n, t, f  in fields.values():
            if f.default is dataclasses.MISSING and f.default_factory is dataclasses.MISSING:
                reordered_fields[n] = (n, t, f)
        for n, t, f  in fields.values():
            if n not in reordered_fields.keys():
                reordered_fields[n] = (n, t, f)
        
        # Create a new dataclass using `dataclasses.make_dataclass`, which ultimately calls type.__new__, which is the same as super().__new__ in our case
        fields = list(reordered_fields.values())
        full_dc = dataclasses.make_dataclass(cls_name=clsname, fields=fields, init=True, bases=(DataClassTerm,))
        
        # Discard the created dataclass class and create new one using super but preserve the dataclass specific namespace.
        return super().__new__(cls, clsname, bases, {**full_dc.__dict__,**clsdict})
    
class DataClassCustom(DataClassTerm, metaclass=DataClassMeta):
    def __new__(cls, *args, **kwargs):
        if len(args)>0:
            raise RuntimeError("Do not use positional arguments for initialization.")
        return super().__new__(cls, *args, **kwargs)

现在让我们创建一个带有父数据类和混合类的样本数据类:

class DataClassCustomA(DataClassCustom):
    field_A_1: int = dataclasses.field()
    field_A_2: ty.AnyStr = dataclasses.field(default=None)

class SomeOtherClass:
    def methodA(self):
        print('print from SomeOtherClass().methodA')

class DataClassCustomB(DataClassCustomA,SomeOtherClass):
    field_B_1: int = dataclasses.field()
    field_B_2: ty.Dict = dataclasses.field(default_factory=dict)

结果是

result_b = DataClassCustomB(field_A_1=1, field_B_1=2)

result_b
# DataClassCustomB(field_A_1=1, field_B_1=2, field_A_2=None, field_B_2={})

result_b.methodA()
# print from SomeOtherClass().methodA

尝试在每个父类上使用@dataclass装饰器做同样的事情会在接下来的子类中引发一个异常,如TypeError(f'non-default argument <field-name)跟随默认参数')。上面的解决方案防止了这种情况的发生,因为字段首先被重新排序。然而,由于字段的顺序被修改了,在DataClassCustom中防止*args的使用。__new__是强制的,因为原来的顺序不再有效。

虽然在Python >=3.10中引入了kw_only特性,本质上使数据类中的继承更加可靠,但上面的示例仍然可以用作一种使数据类可继承的方法,而不需要使用@dataclass装饰器。