aboutsummaryrefslogtreecommitdiff
path: root/_bibliography/big-data.bib
blob: 3d96b413be4cabcd3d32063c21e73a01dcae2575 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
@inproceedings{armbrust2015spark,
  title={Spark sql: Relational data processing in spark},
  author={Armbrust, Michael and Xin, Reynold S and Lian, Cheng and Huai, Yin and Liu, Davies and Bradley, Joseph K and Meng, Xiangrui and Kaftan, Tomer and Franklin, Michael J and Ghodsi, Ali and others},
  booktitle={Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data},
  pages={1383--1394},
  year={2015},
  organization={ACM}
}

@article{bu2010haloop,
  title={HaLoop: efficient iterative data processing on large clusters},
  author={Bu, Yingyi and Howe, Bill and Balazinska, Magdalena and Ernst, Michael D},
  journal={Proceedings of the VLDB Endowment},
  volume={3},
  number={1-2},
  pages={285--296},
  year={2010},
  publisher={VLDB Endowment}
}

@inproceedings{chambers2010flumejava,
  title={FlumeJava: easy, efficient data-parallel pipelines},
  author={Chambers, Craig and Raniwala, Ashish and Perry, Frances and Adams, Stephen and Henry, Robert R and Bradshaw, Robert and Weizenbaum, Nathan},
  booktitle={ACM Sigplan Notices},
  volume={45},
  number={6},
  pages={363--375},
  year={2010},
  organization={ACM}
}


@article{ching2015one,
  title={One trillion edges: graph processing at Facebook-scale},
  author={Ching, Avery and Edunov, Sergey and Kabiljo, Maja and Logothetis, Dionysios and Muthukrishnan, Sambavi},
  journal={Proceedings of the VLDB Endowment},
  volume={8},
  number={12},
  pages={1804--1815},
  year={2015},
  publisher={VLDB Endowment}
}

@article{dean2008mapreduce,
  title={MapReduce: simplified data processing on large clusters},
  author={Dean, Jeffrey and Ghemawat, Sanjay},
  journal={Communications of the ACM},
  volume={51},
  number={1},
  pages={107--113},
  year={2008},
  publisher={ACM}
}


@inproceedings{ekanayake2010twister,
  title={Twister: a runtime for iterative mapreduce},
  author={Ekanayake, Jaliya and Li, Hui and Zhang, Bingjing and Gunarathne, Thilina and Bae, Seung-Hee and Qiu, Judy and Fox, Geoffrey},
  booktitle={Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing},
  pages={810--818},
  year={2010},
  organization={ACM}
}

@inproceedings{ghemawat2003google,
  title={The Google file system},
  author={Ghemawat, Sanjay and Gobioff, Howard and Leung, Shun-Tak},
  booktitle={ACM SIGOPS operating systems review},
  volume={37},
  number={5},
  pages={29--43},
  year={2003},
  organization={ACM}
}

@inproceedings{hindman2011mesos,
  title={Mesos: A Platform for Fine-Grained Resource Sharing in the Data Center.},
  author={Hindman, Benjamin and Konwinski, Andy and Zaharia, Matei and Ghodsi, Ali and Joseph, Anthony D and Katz, Randy H and Shenker, Scott and Stoica, Ion},
  booktitle={NSDI},
  volume={11},
  pages={22--22},
  year={2011}
}

@inproceedings{isard2007dryad,
  title={Dryad: distributed data-parallel programs from sequential building blocks},
  author={Isard, Michael and Budiu, Mihai and Yu, Yuan and Birrell, Andrew and Fetterly, Dennis},
  booktitle={ACM SIGOPS Operating Systems Review},
  volume={41},
  number={3},
  pages={59--72},
  year={2007},
  organization={ACM}
}


@inproceedings{malewicz2010pregel,
  title={Pregel: a system for large-scale graph processing},
  author={Malewicz, Grzegorz and Austern, Matthew H and Bik, Aart JC and Dehnert, James C and Horn, Ilan and Leiser, Naty and Czajkowski, Grzegorz},
  booktitle={Proceedings of the 2010 ACM SIGMOD International Conference on Management of data},
  pages={135--146},
  year={2010},
  organization={ACM}
}


@inproceedings{olston2008pig,
  title={Pig latin: a not-so-foreign language for data processing},
  author={Olston, Christopher and Reed, Benjamin and Srivastava, Utkarsh and Kumar, Ravi and Tomkins, Andrew},
  booktitle={Proceedings of the 2008 ACM SIGMOD international conference on Management of data},
  pages={1099--1110},
  year={2008},
  organization={ACM}
}

@article{pike2005interpreting,
  title={Interpreting the data: Parallel analysis with Sawzall},
  author={Pike, Rob and Dorward, Sean and Griesemer, Robert and Quinlan, Sean},
  journal={Scientific Programming},
  volume={13},
  number={4},
  pages={277--298},
  year={2005},
  publisher={Hindawi Publishing Corporation}
}

@inproceedings{shvachko2010hadoop,
  title={The hadoop distributed file system},
  author={Shvachko, Konstantin and Kuang, Hairong and Radia, Sanjay and Chansler, Robert},
  booktitle={2010 IEEE 26th symposium on mass storage systems and technologies (MSST)},
  pages={1--10},
  year={2010},
  organization={IEEE}
}

@online{WinNT,
  author = {Tarau, Paul},
  title = {Bulk synchronous model},
  year = 2014,
  url = {http://www.cse.unt.edu/~tarau/teaching/parpro/papers/Bulk%20synchronous%20parallel.pdf},
  urldate = {2016-11-24}
}

@article{thusoo2009hive,
  title={Hive: a warehousing solution over a map-reduce framework},
  author={Thusoo, Ashish and Sarma, Joydeep Sen and Jain, Namit and Shao, Zheng and Chakka, Prasad and Anthony, Suresh and Liu, Hao and Wyckoff, Pete and Murthy, Raghotham},
  journal={Proceedings of the VLDB Endowment},
  volume={2},
  number={2},
  pages={1626--1629},
  year={2009},
  publisher={VLDB Endowment}
}

@inproceedings{thusoo2010hive,
  title={Hive-a petabyte scale data warehouse using hadoop},
  author={Thusoo, Ashish and Sarma, Joydeep Sen and Jain, Namit and Shao, Zheng and Chakka, Prasad and Zhang, Ning and Antony, Suresh and Liu, Hao and Murthy, Raghotham},
  booktitle={2010 IEEE 26th International Conference on Data Engineering (ICDE 2010)},
  pages={996--1005},
  year={2010},
  organization={IEEE}
}
@inproceedings{vavilapalli2013apache,
  title={Apache hadoop yarn: Yet another resource negotiator},
  author={Vavilapalli, Vinod Kumar and Murthy, Arun C and Douglas, Chris and Agarwal, Sharad and Konar, Mahadev and Evans, Robert and Graves, Thomas and Lowe, Jason and Shah, Hitesh and Seth, Siddharth and others},
  booktitle={Proceedings of the 4th annual Symposium on Cloud Computing},
  pages={5},
  year={2013},
  organization={ACM}
}
@inproceedings{xin2013graphx,
  title={Graphx: A resilient distributed graph system on spark},
  author={Xin, Reynold S and Gonzalez, Joseph E and Franklin, Michael J and Stoica, Ion},
  booktitle={First International Workshop on Graph Data Management Experiences and Systems},
  pages={2},
  year={2013},
  organization={ACM}
}

@inproceedings{yu2008dryadlinq,
  title={DryadLINQ: A System for General-Purpose Distributed Data-Parallel Computing Using a High-Level Language.},
  author={Yu, Yuan and Isard, Michael and Fetterly, Dennis and Budiu, Mihai and Erlingsson, {\'U}lfar and Gunda, Pradeep Kumar and Currey, Jon},
  booktitle={OSDI},
  volume={8},
  pages={1--14},
  year={2008}
}

@article{zaharia2010spark,
  title={Spark: cluster computing with working sets.},
  author={Zaharia, Matei and Chowdhury, Mosharaf and Franklin, Michael J and Shenker, Scott and Stoica, Ion},
  journal={HotCloud},
  volume={10},
  pages={10--10},
  year={2010}
}


@article{zhang2012imapreduce,
  title={imapreduce: A distributed computing framework for iterative computation},
  author={Zhang, Yanfeng and Gao, Qixin and Gao, Lixin and Wang, Cuirong},
  journal={Journal of Grid Computing},
  volume={10},
  number={1},
  pages={47--68},
  year={2012},
  publisher={Springer}
}

@article{valiant1990bridging,
  title={A bridging model for parallel computation},
  author={Valiant, Leslie G},
  journal={Communications of the ACM},
  volume={33},
  number={8},
  pages={103--111},
  year={1990},
  publisher={ACM}
}

@inproceedings{okcan2011processing,
  title={Processing theta-joins using MapReduce},
  author={Okcan, Alper and Riedewald, Mirek},
  booktitle={Proceedings of the 2011 ACM SIGMOD International Conference on Management of data},
  pages={949--960},
  year={2011},
  organization={ACM}
}

@article{armbrust2015scaling,
  title={Scaling spark in the real world: performance and usability},
  author={Armbrust, Michael and Das, Tathagata and Davidson, Aaron and Ghodsi, Ali and Or, Andrew and Rosen, Josh and Stoica, Ion and Wendell, Patrick and Xin, Reynold and Zaharia, Matei},
  journal={Proceedings of the VLDB Endowment},
  volume={8},
  number={12},
  pages={1840--1843},
  year={2015},
  publisher={VLDB Endowment}
}

@inproceedings{malewicz2010pregel,
  title={Pregel: a system for large-scale graph processing},
  author={Malewicz, Grzegorz and Austern, Matthew H and Bik, Aart JC and Dehnert, James C and Horn, Ilan and Leiser, Naty and Czajkowski, Grzegorz},
  booktitle={Proceedings of the 2010 ACM SIGMOD International Conference on Management of data},
  pages={135--146},
  year={2010},
  organization={ACM}
}

@inproceedings{gonzalez2012powergraph,
  title={Powergraph: Distributed graph-parallel computation on natural graphs},
  author={Gonzalez, Joseph E and Low, Yucheng and Gu, Haijie and Bickson, Danny and Guestrin, Carlos},
  booktitle={Presented as part of the 10th USENIX Symposium on Operating Systems Design and Implementation (OSDI 12)},
  pages={17--30},
  year={2012}
}

@inproceedings{kreps2011kafka,
  title={Kafka: A distributed messaging system for log processing},
  author={Kreps, Jay and Narkhede, Neha and Rao, Jun and others},
  booktitle={Proceedings of the NetDB},
  pages={1--7},
  year={2011}
}

@inproceedings{zaharia2012discretized,
  title={Discretized streams: an efficient and fault-tolerant model for stream processing on large clusters},
  author={Zaharia, Matei and Das, Tathagata and Li, Haoyuan and Shenker, Scott and Stoica, Ion},
  booktitle={Presented as part of the},
  year={2012}
}