• GraphX之Pregel(BSP模型-消息传递机制)学习


    /*
     * Licensed to the Apache Software Foundation (ASF) under one or more
     * contributor license agreements.  See the NOTICE file distributed with
     * this work for additional information regarding copyright ownership.
     * The ASF licenses this file to You under the Apache License, Version 2.0
     * (the "License"); you may not use this file except in compliance with
     * the License.  You may obtain a copy of the License at
     *
     *    http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */
    
    package org.apache.spark.graphx
    
    import scala.reflect.ClassTag
    import org.apache.spark.Logging
    
    
    /**
     * Implements a Pregel-like bulk-synchronous message-passing API.
     *
     * Unlike the original Pregel API, the GraphX Pregel API factors the sendMessage computation over
     * edges, enables the message sending computation to read both vertex attributes, and constrains
     * messages to the graph structure.  These changes allow for substantially more efficient
     * distributed execution while also exposing greater flexibility for graph-based computation.
     *
     * @example We can use the Pregel abstraction to implement PageRank:
     * {{{
     * val pagerankGraph: Graph[Double, Double] = graph
     *   // Associate the degree with each vertex
     *   .outerJoinVertices(graph.outDegrees) {
     *     (vid, vdata, deg) => deg.getOrElse(0)
     *   }
     *   // Set the weight on the edges based on the degree
     *   .mapTriplets(e => 1.0 / e.srcAttr)
     *   // Set the vertex attributes to the initial pagerank values
     *   .mapVertices((id, attr) => 1.0)
     *
     * def vertexProgram(id: VertexId, attr: Double, msgSum: Double): Double =
     *   resetProb + (1.0 - resetProb) * msgSum
     * def sendMessage(id: VertexId, edge: EdgeTriplet[Double, Double]): Iterator[(VertexId, Double)] =
     *   Iterator((edge.dstId, edge.srcAttr * edge.attr))
     * def messageCombiner(a: Double, b: Double): Double = a + b
     * val initialMessage = 0.0
     * // Execute Pregel for a fixed number of iterations.
     * Pregel(pagerankGraph, initialMessage, numIter)(
     *   vertexProgram, sendMessage, messageCombiner)
     * }}}
     *
     */
    object Pregel extends Logging {
    
      /**
       * Execute a Pregel-like iterative vertex-parallel abstraction.  The
       * user-defined vertex-program `vprog` is executed in parallel on
       * each vertex receiving any inbound messages and computing a new
       * value for the vertex.  The `sendMsg` function is then invoked on
       * all out-edges and is used to compute an optional message to the
       * destination vertex. The `mergeMsg` function is a commutative
       * associative function used to combine messages destined to the
       * same vertex.
       *
       * On the first iteration all vertices receive the `initialMsg` and
       * on subsequent iterations if a vertex does not receive a message
       * then the vertex-program is not invoked.
       *
       * This function iterates until there are no remaining messages, or
       * for `maxIterations` iterations.
       *
       * @tparam VD the vertex data type
       * @tparam ED the edge data type
       * @tparam A the Pregel message type
       *
       * @param graph the input graph.
       *
       * @param initialMsg the message each vertex will receive at the on
       * the first iteration
       *
       * @param maxIterations the maximum number of iterations to run for
       *
       * @param activeDirection the direction of edges incident to a vertex that received a message in
       * the previous round on which to run `sendMsg`. For example, if this is `EdgeDirection.Out`, only
       * out-edges of vertices that received a message in the previous round will run. The default is
       * `EdgeDirection.Either`, which will run `sendMsg` on edges where either side received a message
       * in the previous round. If this is `EdgeDirection.Both`, `sendMsg` will only run on edges where
       * *both* vertices received a message.
       *
       * @param vprog the user-defined vertex program which runs on each
       * vertex and receives the inbound message and computes a new vertex
       * value.  On the first iteration the vertex program is invoked on
       * all vertices and is passed the default message.  On subsequent
       * iterations the vertex program is only invoked on those vertices
       * that receive messages.
       *
       * @param sendMsg a user supplied function that is applied to out
       * edges of vertices that received messages in the current
       * iteration
       *
       * @param mergeMsg a user supplied function that takes two incoming
       * messages of type A and merges them into a single message of type
       * A.  ''This function must be commutative and associative and
       * ideally the size of A should not increase.''
       *
       * @return the resulting graph at the end of the computation
       *
       */
      def apply[VD: ClassTag, ED: ClassTag, A: ClassTag]
         (graph: Graph[VD, ED],
          initialMsg: A,
          maxIterations: Int = Int.MaxValue,
          activeDirection: EdgeDirection = EdgeDirection.Either)
         (vprog: (VertexId, VD, A) => VD,
          sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
          mergeMsg: (A, A) => A)
        : Graph[VD, ED] =
      {
        var g = graph.mapVertices((vid, vdata) => vprog(vid, vdata, initialMsg)).cache()
        // compute the messages
        var messages = g.mapReduceTriplets(sendMsg, mergeMsg)
        var activeMessages = messages.count()
        // Loop
        var prevG: Graph[VD, ED] = null
        var i = 0
        while (activeMessages > 0 && i < maxIterations) {
          // Receive the messages. Vertices that didn't get any messages do not appear in newVerts.
          val newVerts = g.vertices.innerJoin(messages)(vprog).cache()
          // Update the graph with the new vertices.
          prevG = g
          g = g.outerJoinVertices(newVerts) { (vid, old, newOpt) => newOpt.getOrElse(old) }
          g.cache()
    
          val oldMessages = messages
          // Send new messages. Vertices that didn't get any messages don't appear in newVerts, so don't
          // get to send messages. We must cache messages so it can be materialized on the next line,
          // allowing us to uncache the previous iteration.
          messages = g.mapReduceTriplets(sendMsg, mergeMsg, Some((newVerts, activeDirection))).cache()
          // The call to count() materializes `messages`, `newVerts`, and the vertices of `g`. This
          // hides oldMessages (depended on by newVerts), newVerts (depended on by messages), and the
          // vertices of prevG (depended on by newVerts, oldMessages, and the vertices of g).
          activeMessages = messages.count()
    
          logInfo("Pregel finished iteration " + i)
    
          // Unpersist the RDDs hidden by newly-materialized RDDs
          oldMessages.unpersist(blocking=false)
          newVerts.unpersist(blocking=false)
          prevG.unpersistVertices(blocking=false)
          prevG.edges.unpersist(blocking=false)
          // count the iteration
          i += 1
        }
    
        g
      } // end of apply
    
    } // end of class Pregel
  • 相关阅读:
    2018-8-18 训练神经网络笔记
    ffmpeg解码视频为图片和将图片合成一个MP4视频
    minikube start error
    按顺序将目录下的所有文件的绝对路径写入文件中
    ssh远程免密登录
    Ubuntu默认的awk一直报语法错误
    ffmpeg常用操作
    ssh免密登录server
    cv::namedWindow是非线程安全的
    lingcrypt源码安装undefined reference to ...
  • 原文地址:https://www.cnblogs.com/likai198981/p/4168785.html
Copyright © 2020-2023  润新知