@article{armbrust2015scaling, title={Scaling spark in the real world: performance and usability}, author={Armbrust, Michael and Das, Tathagata and Davidson, Aaron and Ghodsi, Ali and Or, Andrew and Rosen, Josh and Stoica, Ion and Wendell, Patrick and Xin, Reynold and Zaharia, Matei}, journal={Proceedings of the VLDB Endowment}, volume={8}, number={12}, pages={1840--1843}, year={2015}, publisher={VLDB Endowment} } @inproceedings{armbrust2015spark, title={Spark sql: Relational data processing in spark}, author={Armbrust, Michael and Xin, Reynold S and Lian, Cheng and Huai, Yin and Liu, Davies and Bradley, Joseph K and Meng, Xiangrui and Kaftan, Tomer and Franklin, Michael J and Ghodsi, Ali and others}, booktitle={Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data}, pages={1383--1394}, year={2015}, organization={ACM} } @article{bu2010haloop, title={HaLoop: efficient iterative data processing on large clusters}, author={Bu, Yingyi and Howe, Bill and Balazinska, Magdalena and Ernst, Michael D}, journal={Proceedings of the VLDB Endowment}, volume={3}, number={1-2}, pages={285--296}, year={2010}, publisher={VLDB Endowment} } @inproceedings{chambers2010flumejava, title={FlumeJava: easy, efficient data-parallel pipelines}, author={Chambers, Craig and Raniwala, Ashish and Perry, Frances and Adams, Stephen and Henry, Robert R and Bradshaw, Robert and Weizenbaum, Nathan}, booktitle={ACM Sigplan Notices}, volume={45}, number={6}, pages={363--375}, year={2010}, organization={ACM} } @article{ching2015one, title={One trillion edges: graph processing at Facebook-scale}, author={Ching, Avery and Edunov, Sergey and Kabiljo, Maja and Logothetis, Dionysios and Muthukrishnan, Sambavi}, journal={Proceedings of the VLDB Endowment}, volume={8}, number={12}, pages={1804--1815}, year={2015}, publisher={VLDB Endowment} } @article{dean2008mapreduce, title={MapReduce: simplified data processing on large clusters}, author={Dean, Jeffrey and Ghemawat, Sanjay}, journal={Communications of the ACM}, volume={51}, number={1}, pages={107--113}, year={2008}, publisher={ACM} } @inproceedings{ekanayake2010twister, title={Twister: a runtime for iterative mapreduce}, author={Ekanayake, Jaliya and Li, Hui and Zhang, Bingjing and Gunarathne, Thilina and Bae, Seung-Hee and Qiu, Judy and Fox, Geoffrey}, booktitle={Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing}, pages={810--818}, year={2010}, organization={ACM} } @inproceedings{ghemawat2003google, title={The Google file system}, author={Ghemawat, Sanjay and Gobioff, Howard and Leung, Shun-Tak}, booktitle={ACM SIGOPS operating systems review}, volume={37}, number={5}, pages={29--43}, year={2003}, organization={ACM} } @inproceedings{gonzalez2012powergraph, title={Powergraph: Distributed graph-parallel computation on natural graphs}, author={Gonzalez, Joseph E and Low, Yucheng and Gu, Haijie and Bickson, Danny and Guestrin, Carlos}, booktitle={Presented as part of the 10th USENIX Symposium on Operating Systems Design and Implementation (OSDI 12)}, pages={17--30}, year={2012} } @inproceedings{hindman2011mesos, title={Mesos: A Platform for Fine-Grained Resource Sharing in the Data Center.}, author={Hindman, Benjamin and Konwinski, Andy and Zaharia, Matei and Ghodsi, Ali and Joseph, Anthony D and Katz, Randy H and Shenker, Scott and Stoica, Ion}, booktitle={NSDI}, volume={11}, pages={22--22}, year={2011} } @inproceedings{hunt2010zookeeper, title={ZooKeeper: Wait-free Coordination for Internet-scale Systems.}, author={Hunt, Patrick and Konar, Mahadev and Junqueira, Flavio Paiva and Reed, Benjamin}, booktitle={USENIX Annual Technical Conference}, volume={8}, pages={9}, year={2010} } @inproceedings{isard2007dryad, title={Dryad: distributed data-parallel programs from sequential building blocks}, author={Isard, Michael and Budiu, Mihai and Yu, Yuan and Birrell, Andrew and Fetterly, Dennis}, booktitle={ACM SIGOPS Operating Systems Review}, volume={41}, number={3}, pages={59--72}, year={2007}, organization={ACM} } @inproceedings{kreps2011kafka, title={Kafka: A distributed messaging system for log processing}, author={Kreps, Jay and Narkhede, Neha and Rao, Jun and others}, booktitle={Proceedings of the NetDB}, pages={1--7}, year={2011} } @inproceedings{li2014tachyon, title={Tachyon: Reliable, memory speed storage for cluster computing frameworks}, author={Li, Haoyuan and Ghodsi, Ali and Zaharia, Matei and Shenker, Scott and Stoica, Ion}, booktitle={Proceedings of the ACM Symposium on Cloud Computing}, pages={1--15}, year={2014}, organization={ACM} } @inproceedings{malewicz2010pregel, title={Pregel: a system for large-scale graph processing}, author={Malewicz, Grzegorz and Austern, Matthew H and Bik, Aart JC and Dehnert, James C and Horn, Ilan and Leiser, Naty and Czajkowski, Grzegorz}, booktitle={Proceedings of the 2010 ACM SIGMOD International Conference on Management of data}, pages={135--146}, year={2010}, organization={ACM} } @inproceedings{okcan2011processing, title={Processing theta-joins using MapReduce}, author={Okcan, Alper and Riedewald, Mirek}, booktitle={Proceedings of the 2011 ACM SIGMOD International Conference on Management of data}, pages={949--960}, year={2011}, organization={ACM} } @inproceedings{olston2008pig, title={Pig latin: a not-so-foreign language for data processing}, author={Olston, Christopher and Reed, Benjamin and Srivastava, Utkarsh and Kumar, Ravi and Tomkins, Andrew}, booktitle={Proceedings of the 2008 ACM SIGMOD international conference on Management of data}, pages={1099--1110}, year={2008}, organization={ACM} } @article{pike2005interpreting, title={Interpreting the data: Parallel analysis with Sawzall}, author={Pike, Rob and Dorward, Sean and Griesemer, Robert and Quinlan, Sean}, journal={Scientific Programming}, volume={13}, number={4}, pages={277--298}, year={2005}, publisher={Hindawi Publishing Corporation} } @inproceedings{shvachko2010hadoop, title={The hadoop distributed file system}, author={Shvachko, Konstantin and Kuang, Hairong and Radia, Sanjay and Chansler, Robert}, booktitle={2010 IEEE 26th symposium on mass storage systems and technologies (MSST)}, pages={1--10}, year={2010}, organization={IEEE} } @online{WinNT, author = {Tarau, Paul}, title = {Bulk synchronous model}, year = 2014, url = {http://www.cse.unt.edu/~tarau/teaching/parpro/papers/Bulk%20synchronous%20parallel.pdf}, urldate = {2016-11-24} } @article{thusoo2009hive, title={Hive: a warehousing solution over a map-reduce framework}, author={Thusoo, Ashish and Sarma, Joydeep Sen and Jain, Namit and Shao, Zheng and Chakka, Prasad and Anthony, Suresh and Liu, Hao and Wyckoff, Pete and Murthy, Raghotham}, journal={Proceedings of the VLDB Endowment}, volume={2}, number={2}, pages={1626--1629}, year={2009}, publisher={VLDB Endowment} } @inproceedings{thusoo2010hive, title={Hive-a petabyte scale data warehouse using hadoop}, author={Thusoo, Ashish and Sarma, Joydeep Sen and Jain, Namit and Shao, Zheng and Chakka, Prasad and Zhang, Ning and Antony, Suresh and Liu, Hao and Murthy, Raghotham}, booktitle={2010 IEEE 26th International Conference on Data Engineering (ICDE 2010)}, pages={996--1005}, year={2010}, organization={IEEE} } @article{valiant1990bridging, title={A bridging model for parallel computation}, author={Valiant, Leslie G}, journal={Communications of the ACM}, volume={33}, number={8}, pages={103--111}, year={1990}, publisher={ACM} } @inproceedings{vavilapalli2013apache, title={Apache hadoop yarn: Yet another resource negotiator}, author={Vavilapalli, Vinod Kumar and Murthy, Arun C and Douglas, Chris and Agarwal, Sharad and Konar, Mahadev and Evans, Robert and Graves, Thomas and Lowe, Jason and Shah, Hitesh and Seth, Siddharth and others}, booktitle={Proceedings of the 4th annual Symposium on Cloud Computing}, pages={5}, year={2013}, organization={ACM} } @inproceedings{xin2013graphx, title={Graphx: A resilient distributed graph system on spark}, author={Xin, Reynold S and Gonzalez, Joseph E and Franklin, Michael J and Stoica, Ion}, booktitle={First International Workshop on Graph Data Management Experiences and Systems}, pages={2}, year={2013}, organization={ACM} } @inproceedings{yu2008dryadlinq, title={DryadLINQ: A System for General-Purpose Distributed Data-Parallel Computing Using a High-Level Language.}, author={Yu, Yuan and Isard, Michael and Fetterly, Dennis and Budiu, Mihai and Erlingsson, {\'U}lfar and Gunda, Pradeep Kumar and Currey, Jon}, booktitle={OSDI}, volume={8}, pages={1--14}, year={2008} } @article{zaharia2010spark, title={Spark: cluster computing with working sets.}, author={Zaharia, Matei and Chowdhury, Mosharaf and Franklin, Michael J and Shenker, Scott and Stoica, Ion}, journal={HotCloud}, volume={10}, pages={10--10}, year={2010} } @inproceedings{zaharia2012discretized, title={Discretized streams: an efficient and fault-tolerant model for stream processing on large clusters}, author={Zaharia, Matei and Das, Tathagata and Li, Haoyuan and Shenker, Scott and Stoica, Ion}, booktitle={Presented as part of the}, year={2012} } @article{zhang2012imapreduce, title={imapreduce: A distributed computing framework for iterative computation}, author={Zhang, Yanfeng and Gao, Qixin and Gao, Lixin and Wang, Cuirong}, journal={Journal of Grid Computing}, volume={10}, number={1}, pages={47--68}, year={2012}, publisher={Springer} }