• python mongodb MapReduce


    # -*- coding: utf-8 -*-
    import os
    import csv
    import pymongo
    from pymongo import MongoClient
    from bson.code import Code
    from pymongo import MongoClient

    #建立连接
    client = MongoClient('10.20.4.79', 27017)
    #client = MongoClient('10.20.66.106', 27017)
    db_name = 'ta' #数据库名
    db = client[db_name]

    插入测试数据:

      for i in xrange(1000):
        rID=math.floor(random.random()*10); 
        price = round(random.random()*10,2); 
        if rID < 4:
          db.test.insert({"_id":i,"user":"Joe","product":rID,"price":price}); 
        elif rID>=4 and rID<7:
          db.test.insert({"_id":i,"user":"Josh","product":rID,"price":price}); 
        else:
          db.test.insert({"_id":i,"user":"Ken","product":rID,"price":price});

      结果数据为: 

      { "_id" : 0, "price" : 5.9, "product" : 9, "user" : "Ken" }
      { "_id" : 1, "price" : 7.59, "product" : 7, "user" : "Ken" }
      { "_id" : 2, "price" : 4.72, "product" : 0, "user" : "Joe" }
      { "_id" : 3, "price" : 1.35, "product" : 1, "user" : "Joe" }
      { "_id" : 4, "price" : 2.31, "product" : 0, "user" : "Joe" }
      { "_id" : 5, "price" : 5.29, "product" : 5, "user" : "Josh" }
      { "_id" : 6, "price" : 3.34, "product" : 1, "user" : "Joe" }
      { "_id" : 7, "price" : 7.2, "product" : 4, "user" : "Josh" }
      { "_id" : 8, "price" : 8.1, "product" : 6, "user" : "Josh" }
      { "_id" : 9, "price" : 2.57, "product" : 3, "user" : "Joe" }
      { "_id" : 10, "price" : 0.54, "product" : 2, "user" : "Joe" }
      { "_id" : 11, "price" : 0.66, "product" : 1, "user" : "Joe" }
      { "_id" : 12, "price" : 5.51, "product" : 1, "user" : "Joe" }
      { "_id" : 13, "price" : 3.74, "product" : 6, "user" : "Josh" }
      { "_id" : 14, "price" : 4.82, "product" : 0, "user" : "Joe" }
      { "_id" : 15, "price" : 9.79, "product" : 3, "user" : "Joe" }
      { "_id" : 16, "price" : 9.6, "product" : 5, "user" : "Josh" }
      { "_id" : 17, "price" : 4.06, "product" : 7, "user" : "Ken" }
      { "_id" : 18, "price" : 1.37, "product" : 5, "user" : "Josh" }
      { "_id" : 19, "price" : 6.77, "product" : 9, "user" : "Ken" }

    测试1、每个用户各购买了多少个产品?
    用SQL语句实现为:select user,count(product) from test group by user

     mapper = Code("""function (){emit(this.user,{count:1})}""")

      reduce = Code("function (key, values) {"
        " var total = 0;"
        " for (var i = 0; i < values.length; i++) {"
        " total += values[i].count;"
        " }"
        " return {count:total};"
        "}")

        result=db.test.map_reduce(mapper,reduce,out ='myresults')

      for doc in db.myresults.find():

        print doc

       测试 2、查询每个用户,买了多少商品,总价格,及评价价格   条件是价格大于5的 SQL实现:select user,count(sku),sum(price),      

       round(sum(price)/count(sku),2) as avgPrice from test where prince>5 group by user


      mapper=Code("""function (){emit(this.user,{amount:this.price,count:1,avgPrice:0})}""")

      reduce = Code("function (key, values) {"
        " var res={amount:0,count:0,avgPrice:0};"
        " for (var i = 0; i < values.length; i++) "
        " {"
          " res.count += values[i].count;"
          " res.amount += values[i].amount;"
        " }"
         " res.avgPrice = (res.amount/res.count).toFixed(2);"
        " return res;"
        "}")

      result = db.test.map_reduce(mapper,reduce,out ='myresults',query={'price':{'$gt': 6}})

       for doc in db.myresults.find():
        print doc

     
  • 相关阅读:
    Windows Server 2008 R2 Enterprise 安装.NET Framework 4.0
    layer弹层content写错导致div复制了一次,导致id失效 $().val() 获取不到dispaly:none div里表单的值
    IIS 注册.NET Framework 4.0 命令
    记一次神秘又刺激的装机
    HTTP Error 503. The service is unavailable.
    找到多个与名为“Home”的控制器匹配的类型。
    Discuz论坛广告横幅大图在百度app内无法显示,百度app默认开启了广告屏蔽
    解决Antimalware Service Executable CPU,内存占用高的问题
    Discuz 部署,500 – 内部服务器错误。 您查找的资源存在问题,因而无法显示。
    IIS部署网站只有首页能访问,其他链接失效/运行.net+Access网站-可能原因:IIS未启用32位应用程序模式
  • 原文地址:https://www.cnblogs.com/shaosks/p/5685354.html
Copyright © 2020-2023  润新知