• 如何使用多线程?


    需求:
    https://intrinio.com/tutorial/web_api
    我们通过上述网站提供的API获取了中国股市某支股票csv数据文件,现在要下载多只股票的csv数据,并将其转换为xml文件,如何使用线程来提高下载并处理的效率?
    思路:通过python中的thread来下载
    代码:

    import csv
    from xml.etree.ElemenTree import Element, ElementTree
    import requests
    from StringIO import StringIO
    from xml_pretty import pretty
    
    def download(url):
        response = requests.get(url,timeout=3)
        if response.ok:
            return StringIO(response.content)
    
    def csvToxml(scsv,fxml):
        reader = csv.reader(scsv)
        header = reader.next()
        headers = map(lambda h: h.replace( , ),headers)
    
        root = Element("Data")
        for row in reader:
            eRow = Element("Row")
            root.append(eRow)
            for tag,text in zip(headers,row):
                e = Element(tag)
                e.text = text
                eRow.append(e)
    
        pretty(root)
        et = ElementTree(root)
        et.write(fxml)
    
    def handle(sid):
        print('Download...(%d)' % sid)
        url = 'http://table.finance.yahoo.com/table.csv?s=%s.sz'
        url %= str(sid).rjust(6,'0')
        rf = download(url)
        if rf is None:return
    
        print('convert to xml...(%d)' % sid)
        fname = str(sid).rjust(6,'0') +'.xml'
        with open(fname,'wb') as wf:
            csvToxml(rf,wf)
    
    # 方法一
    from threading import Thread
    t = Thread(target=handle,args=(1,)) # 创建一个线程对象,并处理第一支股票
    t.start # 执行线程
    
    # 方法二
    class MyThread(Thread):
        def __init__(self,sid):
           Thread.__init__(self) # 调用父类的构造器
           self.sid = sid
       
       def run(self):
           handle(self.sid)
    
    threads = []
    for i in xrange(1,11):
        t = MyThread(i)
        threads.append(t)
        t.start()
    
    for t in threads:
        t.join() # 阻塞函数等待子线程的退出,如果run函数没有执行完主线程函数不会退出,即下面没有打印
    print('main thread')
    
    # io型操作,相当于超市订货,例如上面的download操作
    # cpu型操作,相当于超市货物搬运,例如csv转换xml文件
    # 在python中不适合于用cpu密集型操作,原因是global interpreter lock,全局解释器锁,python中的线程只适合处理io型的操作
    
    if __name__ == '__main__':
        url = 'http://table.finance.yahoo.com/table.csv?s=000001.sz'
        rf = download(url)
        if rf:
            with open('000001.xml',wb) as wf:
                csvToxml(rf,wf)
    
    =================================================================
    
    import requests
    import base64
    from io import StringIO
    import csv
    from xml.etree.ElementTree import ElementTree, Element, SubElement
    
    apikey = 'OjZlY2MzYTQwNGVlMTI3Y2VkYjMyYTZiNzJiYzdlOTFk'
    
    def download_csv(page_number):
        print('download csv data [page=%s]' % page_number)
        url = "https://api.intrinio.com/prices.csv?api_key=OjZlY2MzYTQwNGVlMTI3Y2VkYjMyYTZiNzJiYzdlOTFk&identifier=AAPL&page_size=20&page_number=%s&start_date=2017-09-28&end_date=2020-09-28" % page_number
        # auth = b'Basic ' + base64.b64encode(b'%s' % api_key)
        # headers = {'Authorization' : auth}
        response = requests.get(url)
    
        if response.ok:
            return StringIO(response.text)
    
    def csv_to_xml(csv_file, xml_path):
        print('Convert csv data to %s' % xml_path)
        reader = csv.reader(csv_file)
        headers = next(reader)
    
        root = Element('Data')
        root.text = '
    	'
        root.tail = '
    '
    
        for row in reader:
            book = SubElement(root, 'Row')
            book.text = '
    		'
            book.tail = '
    	'
    
            for tag, text in zip(headers, row):
                e = SubElement(book, tag)
                e.text = text
                e.tail = '
    		'
            e.tail = '
    	'
    
        ElementTree(root).write(xml_path, encoding='utf8')
    
    def download_and_save(page_number, xml_path):
        # IO
        csv_file = None
        while not csv_file:
            csv_file = download_csv(page_number)
        # CPU
        csv_to_xml(csv_file, 'data%s.xml' % page_number)
    
    from threading import Thread
    class MyThread(Thread):
        def __init__(self, page_number, xml_path):
            super().__init__()
            self.page_number = page_number
            self.xml_path = xml_path
    
        def run(self):
            download_and_save(self.page_number, self.xml_path)
    
    if __name__ == '__main__':
        import time
        t0 = time.time()
        thread_list = []
        for i in range(1, 6):
            t = MyThread(i, 'data%s.xml' % i)
            t.start()
            thread_list.append(t)
    
        for t in thread_list:
            t.join()
        # for i in range(1, 6):
        #      download_and_save(i, 'data%s.xml' % i)
        print(time.time() - t0)
        print('main thread end.')
    
    
  • 相关阅读:
    Fragment 总结
    Varnish缓存服务器的搭建配置手册
    关于页面缓存服务器的研究报告
    基于Html5的移动端开发框架的研究
    C#的Process类的一些用法
    c#中进程的使用
    C#反射(转载)
    进制的转换 以及十进制转换成x进制的代码
    算法及其复杂度
    cocos总结
  • 原文地址:https://www.cnblogs.com/Richardo-M-Q/p/13884500.html
Copyright © 2020-2023  润新知