• hadoop的merge操作脚本


    import math
    import struct
    import traceback
     
    import numpy as np
     
     
    def mapper():
        #filepath = os.environ["map_input_file"]
        #filename = "zhangpeng66"
        filepath = 'jianku_data'
        for line in sys.stdin:
            if "jianku_data" in filepath:
                line = line.rstrip("\n")
                tokens = line.split('\t')
                if len(tokens) < 13: 
                    continue
                os_key = tokens[0]
                title=tokens[5]
                real_title=tokens[10]
                alt=tokens[7]
                ct0=tokens[12]
                print '\t'.join([os_key, title, real_title, alt, ct0])
     
    def reducer():
        for line in sys.stdin:
            line = line.strip('\r\n')
            l_info = line.split('\t')
           
            os_key = l_info[0]
     
            for os_query in open(sys.argv[2], 'r'):
                os_query = os_query.strip('\n\r')
                if os_key == os_query:
                    print(line)
                    break
     
    if __name__ == '__main__':
        if sys.argv[1] == 'map':
            mapper()
        elif sys.argv[1] == 'reduce':
            reducer()
        else:
            print >> sys.stderr, 'map or reduce, please.'  
    
  • 相关阅读:
    安卓中期小作业
    安卓大作业UI预定搞
    实验3
    实验一总结
    实验8 SQLite数据库操作
    实验6 在应用程序中播放音频和视频
    实验4 颜色、字符串资源的使用
    实验四
    实验三
    实验二
  • 原文地址:https://www.cnblogs.com/douzujun/p/15529800.html
Copyright © 2020-2023  润新知