From c019fd9d7f49168f0bc855de717d710946c032e1 Mon Sep 17 00:00:00 2001 From: Connor Date: Mon, 12 Dec 2016 21:49:01 -0500 Subject: .. --- _bibliography/dist-langs.bib | 8 ++++++++ chapter/4/dist-langs.md | 11 ++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/_bibliography/dist-langs.bib b/_bibliography/dist-langs.bib index b690e77..cd605f0 100644 --- a/_bibliography/dist-langs.bib +++ b/_bibliography/dist-langs.bib @@ -92,3 +92,11 @@ year={2008}, publisher={ACM} } + +@inproceedings{gonzalez2012powergraph, + title={Powergraph: Distributed graph-parallel computation on natural graphs}, + author={Gonzalez, Joseph E and Low, Yucheng and Gu, Haijie and Bickson, Danny and Guestrin, Carlos}, + booktitle={Presented as part of the 10th USENIX Symposium on Operating Systems Design and Implementation (OSDI 12)}, + pages={17--30}, + year={2012} +} diff --git a/chapter/4/dist-langs.md b/chapter/4/dist-langs.md index e489d03..32d1175 100644 --- a/chapter/4/dist-langs.md +++ b/chapter/4/dist-langs.md @@ -311,9 +311,18 @@ In the reduce phase, the list of 1's is summed to compute a wordcount for each w ![Alt text] (./MR.png "MapReduce Workflow") (http://www.milanor.net/blog/an-example-of-mapreduce-with-rmr2/) -#### DryadLINQ () #### Discretized Streams (2012) +#### GraphX (2013) + +Many real world problems are expressed using graphs. +GraphX is a system built on top of the Spark MapReduce framework { // TODO cite RDD } that exposes traditional graph operations while internally representing a graph as a collection of RDD's. +GraphX exposes these operations through what it calls a Resilient Distributed Graph (RDG). +Internally, an RDG is a collection of RDD's that define a vertex split of a graph { // TODO CITE powergraph }. +Because they are built on top of RDD's, RDG's inherit immutability. +When a tranformation is performed, a new graph is created. +In this way, fault tolerance in GraphX can be executed the same way as it is in vanilla Spark; when a fault happens, the series of computations is remembered and re-executed. + ### Which is best? Why? MR vs Actors: depends on problem, solution -- cgit v1.2.3