python只需要修改一下路径就可以批量获取电话号码的归属地及来源分类-CFANZ编程社区

思路

我们用360查询电话号码，然后就会返回我们想要的电话归属地（省份、市级）以及来源分类（联通、电信、移动）
然后我们批量导入查询，并写入excel文档
接口测试1：https://www.so.com/s?ie=utf-8&q=0532-66776800
接口测试2：https://www.so.com/s?ie=utf-8&q=0532-66776800
案例为了分块测试代码，我们使用jupyter notebook 编写，完整代码在文末。

测试请求流程

url=r"https://www.so.com/s?ie=utf-8&q=18529108189"
import requests
import re
r=requests.get(url)
#r.encoding ="utf-8" #当出现中文乱码问题时使用
print(type(r)) #类型：<class 'requests.models.Response'>
print(r. status_code)#状态：200 或者404、500...等
print(type(r.text)) #响应体的类型：<class 'str'>
print(r.cookies) #cookies：<RequestsCookieJar[<Cookie(.*)/>]>
print(r.text) #页面内容

python只需要修改一下路径就可以批量获取电话号码的归属地及来源分类_字符串

点击返回值的任意位置，按ctrl+f,输入我们测试的电话号码归属地“广州”回车，定位有我们需要的内容。

python只需要修改一下路径就可以批量获取电话号码的归属地及来源分类_字符串_02

测试解析内容

from bs4 import BeautifulSoup
soup=BeautifulSoup(r.text,'lxml') #初始化为BeautifulSoup的解析形式
#标准化
r1=soup.prettify()#把要解析的字符串以标准的缩进格式输出,同时有节点缺失或错误也可以自行更正

#节点定位-内容获取
A=soup.find_all('p',{'class':"mh-detail"})[0].get_text()
A= A.replace("\n", " ").strip()  #去除换行，去头尾空格
print(A)
A = A.split() #字符串转化为列表
L = len(A)
print(L)
if L==1:
    A0,A1,A2,A3= re.sub("\D", "", A[0]),"未知","未知","未知"
elif L==2:
    A0,A1,A2,A3= re.sub("\D", "", A[0]),A[1],"未知","未知"
elif L==3:
    A0,A1,A2,A3= re.sub("\D", "", A[0]),A[1],A[1],"未知"
else:
    A0,A1,A2,A3= re.sub("\D", "", A[0]),A[1],A[1],A[3]
data = [A0,A1,A2,A3]
print(data)

ok，请求解析的测试都没问题了。当然封装代码的时候我还要考虑请求超时，号码错误等异常，此部分见文末的完整代码。

测试批量获取电话号码

import numpy as np 
import pandas as pd
# 假如我们的电话号码存在excel中
df = pd.read_excel(r'D:/Case_data/telephone.xlsx',encoding='utf-8')
display(df)
# 遍历获取单个电话号码
for i in df["电话"]:
    print(i)

python只需要修改一下路径就可以批量获取电话号码的归属地及来源分类_格式输出_03

测试将请求解析结果写入

import csv
file_name = "D:/Case_data/telephone_result.csv"
f = open(file_name, "w+", newline='',encoding = 'gb18030')
writer = csv.writer(f, dialect='excel')
# 先写入columns_name
writer.writerow(['电话', '省份', '市级', '分类'])
# 写入内容
writer.writerows([[A0, A1, A2, A3]])
f.close()

python只需要修改一下路径就可以批量获取电话号码的归属地及来源分类_格式输出_04

构造完整代码

# 导入需要用到的库
import numpy as np 
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import csv
import time


# 请求和解析和写入
def response(url):
    try:
        #url=r"https://www.so.com/s?ie=utf-8&q=18529108189"
        headers = { }
        headers["User-Agent"]="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36"
        import requests
        import re
        rand = random.randint(3, 10)
        time.sleep(rand)#延时提交
        r=requests.get(url,headers=headers,timeout = 60)
        #r.encoding ="utf-8" #当出现中文乱码问题时使用
        soup=BeautifulSoup(r.text,'lxml') #初始化为BeautifulSoup的解析形式
        r1=soup.prettify()#把要解析的字符串以标准的缩进格式输出,同时有节点缺失或错误也可以自行更正
        
        #节点定位-内容获取
        A=soup.find_all('p',{'class':"mh-detail"})[0].get_text()
        A= A.replace("\n", " ").strip()  #去除换行，去出头尾空格
        A = A.split() #字符串转化为列表
        L = len(A)
        if L==1:
            A0,A1,A2,A3= re.sub("\D", "", A[0]),"未知","未知","未知"
        elif L==2:
            A0,A1,A2,A3= re.sub("\D", "", A[0]),A[1],"未知","未知"
        elif L==3:
            A0,A1,A2,A3= re.sub("\D", "", A[0]),A[1],A[1],"未知"
        else:
            A0,A1,A2,A3= re.sub("\D", "", A[0]),A[1],A[1],A[3]
        
        # 写入
        writer.writerows([[A0, A1, A2, A3]])
    except (HTTPError, URLError, socket.timeout, AttributeError,IndexError) as e:
        print("请求失败")
        return 


# 读取数据并构造批量查询与写入
def resultsA(read_file_path):
    df = pd.read_excel(read_file_path,encoding='utf-8')
    for i in df["电话"]:
        print(i)
        url=r"https://www.so.com/s?ie=utf-8&q="+ str(i)
        response(url)# 请求和解析和写入
        
#执行
if __name__ == '__main__':
    read_file_path= r'D:/Case_data/telephone.xlsx'  #读入电话号码文件路径
    to_file_path= r"D:/Case_data/telephone_result.csv"  #写入结果的路径
    
    f = open(to_file_path, "w+", newline='',encoding = 'gb18030')
    writer = csv.writer(f, dialect='excel')
    # 先写入columns_name
    writer.writerow(['电话', '省份', '市级', '分类'])
    resultsA(read_file_path) # 执行
f.close() # 关闭表

python只需要修改一下路径就可以批量获取电话号码的归属地及来源分类_格式输出_05