is_int('3.14') == False
is_int('-7') == True
is_int('3.14') == False
is_int('-7') == True
正确的RegEx解决方案应该结合Greg Hewgill和Nowell的思想,但不使用全局变量。可以通过将属性附加到方法来实现这一点。另外,我知道在方法中导入是不受欢迎的,但我想要的是像http://peak.telecommunity.com/DevCenter/Importing#lazy-imports这样的“惰性模块”效果
#!/usr/bin/env python
# Uses exclusively methods of the String object
def isInteger(i):
i = str(i)
return i=='0' or (i if i.find('..') > -1 else i.lstrip('-+').rstrip('0').rstrip('.')).isdigit()
# Uses re module for regex
def isIntegre(i):
import re
if not hasattr(isIntegre, '_re'):
print("I compile only once. Remove this line when you are confident in that.")
isIntegre._re = re.compile(r"[-+]?\d+(\.0*)?$")
return isIntegre._re.match(str(i)) is not None
# When executed directly run Unit Tests
if __name__ == '__main__':
for obj in [
# integers
0, 1, -1, 1.0, -1.0,
'0', '0.','0.0', '1', '-1', '+1', '1.0', '-1.0', '+1.0',
# non-integers
1.1, -1.1, '1.1', '-1.1', '+1.1',
'1.1.1', '1.1.0', '1.0.1', '1.0.0',
'1.0.', '1..0', '1..',
'0.0.', '0..0', '0..',
'one', object(), (1,2,3), [1,2,3], {'one':'two'}
# Notice the integre uses 're' (intended to be humorous)
integer = ('an integer' if isInteger(obj) else 'NOT an integer')
integre = ('an integre' if isIntegre(obj) else 'NOT an integre')
# Make strings look like strings in the output
if isinstance(obj, str):
obj = ("'%s'" % (obj,))
print("%30s is %14s is %14s" % (obj, integer, integre))
I compile only once. Remove this line when you are confident in that.
0 is an integer is an integre
1 is an integer is an integre
-1 is an integer is an integre
1.0 is an integer is an integre
-1.0 is an integer is an integre
'0' is an integer is an integre
'0.' is an integer is an integre
'0.0' is an integer is an integre
'1' is an integer is an integre
'-1' is an integer is an integre
'+1' is an integer is an integre
'1.0' is an integer is an integre
'-1.0' is an integer is an integre
'+1.0' is an integer is an integre
1.1 is NOT an integer is NOT an integre
-1.1 is NOT an integer is NOT an integre
'1.1' is NOT an integer is NOT an integre
'-1.1' is NOT an integer is NOT an integre
'+1.1' is NOT an integer is NOT an integre
'1.1.1' is NOT an integer is NOT an integre
'1.1.0' is NOT an integer is NOT an integre
'1.0.1' is NOT an integer is NOT an integre
'1.0.0' is NOT an integer is NOT an integre
'1.0.' is NOT an integer is NOT an integre
'1..0' is NOT an integer is NOT an integre
'1..' is NOT an integer is NOT an integre
'0.0.' is NOT an integer is NOT an integre
'0..0' is NOT an integer is NOT an integre
'0..' is NOT an integer is NOT an integre
'one' is NOT an integer is NOT an integre
<object object at 0x103b7d0a0> is NOT an integer is NOT an integre
(1, 2, 3) is NOT an integer is NOT an integre
[1, 2, 3] is NOT an integer is NOT an integre
{'one': 'two'} is NOT an integer is NOT an integre
#!/usr/bin/env python
def get_int(number):
splits = number.split('.')
if len(splits) > 2:
# too many splits
return None
if len(splits) == 2 and splits[1]:
# handle decimal part recursively :-)
if get_int(splits[1]) != 0:
return None
int_part = splits[0].lstrip("+")
if int_part.startswith('-'):
# handle minus sign recursively :-)
return get_int(int_part[1:]) * -1
# successful 'and' returns last truth-y value (cast is always valid)
return int_part.isdigit() and int(int_part)
tests = ["0", "0.0", "0.1", "1", "1.1", "1.0", "-1", "-1.1", "-1.0", "-0", "--0", "---3", '.3', '--3.', "+13", "+-1.00", "--+123", "-0.000"]
for t in tests:
print "get_int(%s) = %s" % (t, get_int(str(t)))
get_int(0) = 0
get_int(0.0) = 0
get_int(0.1) = None
get_int(1) = 1
get_int(1.1) = None
get_int(1.0) = 1
get_int(-1) = -1
get_int(-1.1) = None
get_int(-1.0) = -1
get_int(-0) = 0
get_int(--0) = 0
get_int(---3) = -3
get_int(.3) = None
get_int(--3.) = 3
get_int(+13) = 13
get_int(+-1.00) = -1
get_int(--+123) = 123
get_int(-0.000) = 0
def int_predicate(number):
return get_int(number) is not None
>>> '16'.isdigit()
>>> s = '-17'
>>> s.startswith('-') and s[1:].isdigit()
def check_int(s):
if s[0] in ('-', '+'):
return s[1:].isdigit()
return s.isdigit()
我们谈论的是整数(不是小数/浮点数); 内置int()的行为是我们的标准(有时很奇怪:“-00”是它的正确输入)
def is_int_str(string):
return (
string.startswith(('-', '+')) and string[1:].isdigit()
) or string.isdigit()
我已经测试了3个主要变体(1)try/except, (2) re.match()和(3)字符串操作(见上文)。第三个变体比try/except和re.match()快两倍。顺便说一句:regex变体是最慢的!请参见下面的测试脚本。
import re
import time
def test(func, test_suite):
for test_case in test_suite:
actual_result = func(*test_case[0])
expected_result = test_case[1]
assert (
actual_result == expected_result
), f'Expected: {expected_result} but actual: {actual_result}'
def perf(func, test_suite):
start = time.time()
for _ in range(0, 1_000_000):
test(func, test_suite)
return time.time() - start
def is_int_str_1(string):
return True
except ValueError:
return False
def is_int_str_2(string):
return re.match(r'^[\-+]?\d+$', string) is not None
def is_int_str_3(string):
return (
string.startswith(('-', '+')) and string[1:].isdigit()
) or string.isdigit()
# Behavior of built-in int() function is a standard for the following tests
test_suite = [
[['1'], True], # func('1') -> True
[['-1'], True],
[['+1'], True],
[['--1'], False],
[['++1'], False],
[['001'], True], # because int() can read it
[['-00'], True], # because of quite strange behavior of int()
[['-'], False],
[['abracadabra'], False],
[['57938759283475928347592347598357098458405834957984755200000000'], True],
time_span_1 = perf(is_int_str_1, test_suite)
time_span_2 = perf(is_int_str_2, test_suite)
time_span_3 = perf(is_int_str_3, test_suite)
print(f'{is_int_str_1.__name__}: {time_span_1} seconds')
print(f'{is_int_str_2.__name__}: {time_span_2} seconds')
print(f'{is_int_str_3.__name__}: {time_span_3} seconds')
is_int_str_1: 4.314162969589233 seconds
is_int_str_2: 5.7216269969940186 seconds
is_int_str_3: 2.5828163623809814 seconds
>>> "+7".lstrip("-+").isdigit()
>>> "-7".lstrip("-+").isdigit()
>>> "7".lstrip("-+").isdigit()
>>> "13.4".lstrip("-+").isdigit()
def is_int(val):
return val.lstrip("-+").isdigit()