文章目录
基本概念
-  
itertion
 -  
iterable
 -  
iterator
 -  
generator
 
迭代器
迭代器的内置方法
s = "alan"
si = iter(s)
l = [1, 2]
li = iter(l)
print(si)  # <str_iterator object at 0x1061cdb70>
print(li)  # <list_iterator object at 0x10a3142e0>
print(si.__iter__())  # <str_iterator object at 0x10c899b70>  可迭代对象有__iter__()方法
print(si.__next__())  # a 拥有__next__方法
print(si.__next__())  # l
print(si.__iter__() is si)  # True __iter__返回自己
 
构造迭代器
# 自定义迭代器
class DataIter(object):
    def __init__(self, *args):
        self.data = list(args)
        self.ind = 0
    def __iter__(self):  # 返回自身
        return self
    def __next__(self):  # 返回数据
        if self.ind == len(self.data):
            raise StopIteration
        else:
            data = self.data[self.ind]
            self.ind += 1
            return data
obj = DataIter(1, 2, 3)
print(obj.__iter__())  # <__main__.DataIter object at 0x10f3cfe20>
print(obj.__next__())  # 1
print(obj.__next__())  # 2
print(obj.__next__())  # 3
# print(obj.__next__())  # 报错
 
进阶版迭代器
"""
next函数只能向前取数据,一次取一个,不能重复取数据,那这个可不可以解决呢?
iterator只能迭代一次,但是iterable对象则没有这个限制
可以把iterator从数据中分离出来,分别定义一个iterable与iterator如下
"""
class Data(object):  # 只是iterable:可迭代对象而不iterator:迭代器
    def __init__(self, *args):
        self.data = list(args)
    def __iter__(self):  # 并没有返回自身
        return DataIterator(self)
class DataIterator(object):  # iterator: 迭代器
    def __init__(self, data):
        self.data = data.data
        self.ind = 0
    def __iter__(self):
        return self
    def __next__(self):
        if self.ind == len(self.data):
            raise StopIteration
        else:
            data = self.data[self.ind]
            self.ind += 1
            return data
if __name__ == '__main__':
    d = Data(1, 2, 3)
    for x in d:
        print(x)
    for x in d:
        print(x)
    """
    1
    2
    3
    1
    2
    3
    """
 
生成器
生成器的内置方法
# 三元表达式构造的生成器
a = (x for x in range(10000000))
print(next(a))  # 0
print(next(a))  # 1
print(a.__next__())  # 2
print(a.__iter__() is a)  # True
print(iter(a) is a)  # True
# yield关键字构造的生成器
 
构造生成器①-“元组生成式”
a = [x for x in range(3)]
type(a)  # list
a = (x for x in range(3))
type(a)  # generator
 
比较列表生成式和生成器的耗时
import time
tic = time.time()
a = sum([x for x in range(10000000)])
toc = time.time()
print(toc - tic)  # 2.3462159633636475
tic = time.time()
a = sum((x for x in range(10000000)))
toc = time.time()
print(toc - tic)  # 0.847653865814209
 
构造生成器②-“yield关键字”
def num():
    print('开始执行')
    for i in range(5):
        yield i
        print('继续执行')
mygen = num()
print(type(mygen))  # <class 'generator'>
 
 
# next迭代获取
next(mygen)
# for循环获取
for step in mygen:
    print(step)
 
实操
def get_table(**kwargs):
    engine = db_client.datacenter_orm.get_engine()
    schema = kwargs.get('schema')
    table_name = kwargs.get('table')
    metadata = MetaData(bind=engine)
    try:
        table_item = Table(table_name, metadata, autoload=True, schema=schema)
    except:
        table_item = Table(table_name.lower(), metadata, autoload=True, schema=schema)
    return table_item
def get_batch(table_model, task_info):
    sort_field = task_info.get("sort_field", "")
    common_field = task_info.get("common_field", [])
    batch_size = task_info.get("batch_size", 1000)
    fields = []
    for f in common_field:
        fields.append(get_model_field(table_model, f))
    fields = tuple(fields)
    limit_offset = 0
    while True:
        session = db_client.datacenter_orm.new_session()
        items = session.query(*fields) \
            .order_by(get_model_field(table_model, sort_field)) \
            .offset(limit_offset).limit(batch_size).all()
        item_dict = [row2dict(item) for item in items]
        session.close()
        yield item_dict
        if len(item_dict) < batch_size:
            break
        limit_offset += len(item_dict)
    yield []
task_info = {
            "common_field": ["id", "name"],
            "sort_field": "id"
            }
y_data = get_batch(get_table(self.conf), task_info)
for data_column in y_data:
    ...









