import re
# "he"(任意字符串): 匹配某个字符串
text = "hello"
ret = re.match("he", text) # 会从头开始匹配
print(ret.group()) # 控制台: he
print(ret.span()) # 控制台: (0, 2)
# ".": 匹配任意的字符
text = "+hello"
ret = re.match(".", text)
print(ret.group()) # 控制台: +
ret = re.match("..", text)
print(ret.group()) # 控制台: +h
# "\d": 匹配任意的数字
text = "123"
ret = re.match("\d", text)
print(ret.group()) # 控制台: 1
ret = re.match("\d\d", text)
print(ret.group()) # 控制台:12
# 中括号的形式代替\d
text = "09"
ret = re.match("[0-9]", text)
print(ret.group()) # 控制台: 0
ret = re.match("[0-9][0-9]", text)
print(ret.group()) # 控制台: 09
# "\D": 匹配任意的非数字
text = "+"
ret = re.match("\D", text)
print(ret.group()) # 控制台: +
# 中括号的形式代替\D
text = "a"
ret = re.match("[^0-9]", text) # [^0-9]: 匹配除0-9以外的字符
print(ret.group())
# \s: 匹配空白字符
text = "\r" # \r, \n, \t, 空格
ret = re.match("\s", text)
print(ret.group())
# \w: 匹配 a-z, A-Z, 数字 和 下划线
text = "_"
ret = re.match("\w", text)
print(ret.group())
# 中括号的形式代替\w
text = "_"
ret = re.match("[a-zA-Z0-9_]", text)
print(ret.group())
# \W: 与\w互补
text = "-"
ret = re.match("\W", text)
print(ret.group())
# 中括号的形式代替\W
text = "-"
ret = re.match("[^a-zA-Z0-9_]", text)
print(ret.group())
# []组合的方式, 只要满足中括号中的字符, 就可以匹配
text_a = "a"
text_1 = "1"
ret = re.match("[a1]", text_a)
print(ret.group()) # 控制台: a
ret = re.match("[a1]", text_1)
print(ret.group()) # 控制台: 1
# \d*: 匹配0或者任意多个数字
text = "1234ab"
ret = re.match("\d*", text) # 控制台:abcd
print(ret.group())
# +: 匹配1个或者多个字符
text_abcd = "abcd"
ret = re.match("\w+", text_abcd)
print(ret.group()) # 控制台:abcd
text_abc = "abc"
ret = re.match("\w+", text_abc)
print(ret.group()) # 控制台:abc
# {m}: 匹配某个字符
text = "abcd"
ret = re.match("\w{3}", text)
print(ret.group()) # 控制台:abc
# {m, n}: 匹配m-n个字符
text = "abcdefg"
ret = re.match("\w{1,3}", text)
print(ret.group()) # 控制台:abc
# 验证手机号
text = "15541216886"
ret = re.match("1[3456]\d{9}", text)
print(ret.group())
# 验证qq邮箱
text = "hynever12_@qq.com"
ret = re.match("\w+@[a-z0-9]+\.[a-z]+", text)
print(ret.group())
# 验证URL
text = "https://baike.baidu.com/item/python/407313?fr=aladdin"
ret = re.match("(http|https|ftp)://[^\s]+", text)
print(ret.group())
# 验证身份证号
text = "21030220050617063X"
ret = re.match("\d{17}[\dxX]", text)
print(ret.group())
# 匹配 ^后面的全部字符
text = "hello"
ret = re.search("^h", text)
print(ret.group())
# $: 表示以...结尾:
text = "xxx@163.com"
ret = re.match("\w+@163.com$", text)
print(ret.group())
# 匹配 1-100的数字
text = "99"
ret = re.match("([1-9]\d?|100)", text) # ?: 代表 \d 这一位可有可无
print(ret.group())
# 匹配多个满足条件的字符
text = "apple's price $99, orange's price is $10"
ret = re.search(".*(\$\d+).*(\$\d+)", text)
print(ret.group()) # 控制台: apple's price $99, orange's price is $10
print(ret.group(1)) # 控制台: $99
print(ret.group(2)) # 控制台: $10
print(ret.group(1, 2)) # 控制台: ('$99', '$10')
print(ret.groups()) # 控制台: ('$99', '$10')
ret = re.findall("\$\d+", text)
print(ret) # 控制台: ['$99', '$10']
ret = re.sub("\$\d+", "0", text)
print(ret) # 控制台: apple's price 0, orange's price is 0
# 字符串分割
text = "hello&world ni hao"
ret = re.split(" |&", text) # 可以使用 &、空格 来分割字符串
print(ret)
ret = re.split("[^a-zA-Z]", text) # 除a-z、A-Z以外的字符, 都作为分割符
print(ret)
# compile 语法
text = "the number is 20.50"
r = re.compile(r"""
\d+ # 小数点前面的数字
\.? # 小数点自己
\d* # 小数点后面的数字
""", re.VERBOSE)
ret = re.search(r, text)
print(ret.group())