我的要求更严格一些:
I had to retain case info (the strings are paths to files displayed to the user, but it's a windows app so internally all operations must be case insensitive)
I needed keys to be as small as possible (it did make a difference in memory performance, chopped off 110 mb out of 370). This meant that caching lowercase version of keys is not an option.
I needed creation of the data structures to be as fast as possible (again made a difference in performance, speed this time). I had to go with a builtin
我最初的想法是用一个不区分大小写的unicode子类替换我们笨拙的Path类-但是:
事实证明很难正确-参见:python中不区分大小写的字符串类
结果是显式dict键处理使代码冗长和混乱-并且容易出错(结构体到处传递,并且不清楚它们是否有CIStr实例作为键/元素,很容易忘记加上some_dict[CIStr(路径)]是丑陋的)
所以我最后不得不写下那个不区分大小写的字典。多亏了@AaronHall编写的代码,游戏变得简单了10倍。
class CIstr(unicode):
"""See https://stackoverflow.com/a/43122305/281545, especially for inlines"""
__slots__ = () # does make a difference in memory performance
#--Hash/Compare
def __hash__(self):
return hash(self.lower())
def __eq__(self, other):
if isinstance(other, CIstr):
return self.lower() == other.lower()
return NotImplemented
def __ne__(self, other):
if isinstance(other, CIstr):
return self.lower() != other.lower()
return NotImplemented
def __lt__(self, other):
if isinstance(other, CIstr):
return self.lower() < other.lower()
return NotImplemented
def __ge__(self, other):
if isinstance(other, CIstr):
return self.lower() >= other.lower()
return NotImplemented
def __gt__(self, other):
if isinstance(other, CIstr):
return self.lower() > other.lower()
return NotImplemented
def __le__(self, other):
if isinstance(other, CIstr):
return self.lower() <= other.lower()
return NotImplemented
#--repr
def __repr__(self):
return '{0}({1})'.format(type(self).__name__,
super(CIstr, self).__repr__())
def _ci_str(maybe_str):
"""dict keys can be any hashable object - only call CIstr if str"""
return CIstr(maybe_str) if isinstance(maybe_str, basestring) else maybe_str
class LowerDict(dict):
"""Dictionary that transforms its keys to CIstr instances.
Adapted from: https://stackoverflow.com/a/39375731/281545
"""
__slots__ = () # no __dict__ - that would be redundant
@staticmethod # because this doesn't make sense as a global function.
def _process_args(mapping=(), **kwargs):
if hasattr(mapping, 'iteritems'):
mapping = getattr(mapping, 'iteritems')()
return ((_ci_str(k), v) for k, v in
chain(mapping, getattr(kwargs, 'iteritems')()))
def __init__(self, mapping=(), **kwargs):
# dicts take a mapping or iterable as their optional first argument
super(LowerDict, self).__init__(self._process_args(mapping, **kwargs))
def __getitem__(self, k):
return super(LowerDict, self).__getitem__(_ci_str(k))
def __setitem__(self, k, v):
return super(LowerDict, self).__setitem__(_ci_str(k), v)
def __delitem__(self, k):
return super(LowerDict, self).__delitem__(_ci_str(k))
def copy(self): # don't delegate w/ super - dict.copy() -> dict :(
return type(self)(self)
def get(self, k, default=None):
return super(LowerDict, self).get(_ci_str(k), default)
def setdefault(self, k, default=None):
return super(LowerDict, self).setdefault(_ci_str(k), default)
__no_default = object()
def pop(self, k, v=__no_default):
if v is LowerDict.__no_default:
# super will raise KeyError if no default and key does not exist
return super(LowerDict, self).pop(_ci_str(k))
return super(LowerDict, self).pop(_ci_str(k), v)
def update(self, mapping=(), **kwargs):
super(LowerDict, self).update(self._process_args(mapping, **kwargs))
def __contains__(self, k):
return super(LowerDict, self).__contains__(_ci_str(k))
@classmethod
def fromkeys(cls, keys, v=None):
return super(LowerDict, cls).fromkeys((_ci_str(k) for k in keys), v)
def __repr__(self):
return '{0}({1})'.format(type(self).__name__,
super(LowerDict, self).__repr__())
隐式vs显式仍然是一个问题,但一旦问题解决了,重命名属性/变量以ci开头(以及一个很大的文档注释解释ci代表大小写不敏感)我认为是一个完美的解决方案-因为代码的读者必须充分意识到我们正在处理不区分大小写的底层数据结构。
这将有望修复一些难以重现的错误,我怀疑归结为大小写敏感性。
欢迎评论/更正:)