aboutsummaryrefslogtreecommitdiff
path: root/_bibliography/big-data.bib
blob: 297073aaad92b4e3821641bed69957a5f7e89941 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
@article{armbrust2015scaling,
  title={Scaling spark in the real world: performance and usability},
  author={Armbrust, Michael and Das, Tathagata and Davidson, Aaron and Ghodsi, Ali and Or, Andrew and Rosen, Josh and Stoica, Ion and Wendell, Patrick and Xin, Reynold and Zaharia, Matei},
  journal={Proceedings of the VLDB Endowment},
  volume={8},
  number={12},
  pages={1840--1843},
  year={2015},
  publisher={VLDB Endowment}
}


@inproceedings{armbrust2015spark,
  title={Spark sql: Relational data processing in spark},
  author={Armbrust, Michael and Xin, Reynold S and Lian, Cheng and Huai, Yin and Liu, Davies and Bradley, Joseph K and Meng, Xiangrui and Kaftan, Tomer and Franklin, Michael J and Ghodsi, Ali and others},
  booktitle={Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data},
  pages={1383--1394},
  year={2015},
  organization={ACM}
}

@article{bu2010haloop,
  title={HaLoop: efficient iterative data processing on large clusters},
  author={Bu, Yingyi and Howe, Bill and Balazinska, Magdalena and Ernst, Michael D},
  journal={Proceedings of the VLDB Endowment},
  volume={3},
  number={1-2},
  pages={285--296},
  year={2010},
  publisher={VLDB Endowment}
}

@inproceedings{chambers2010flumejava,
  title={FlumeJava: easy, efficient data-parallel pipelines},
  author={Chambers, Craig and Raniwala, Ashish and Perry, Frances and Adams, Stephen and Henry, Robert R and Bradshaw, Robert and Weizenbaum, Nathan},
  booktitle={ACM Sigplan Notices},
  volume={45},
  number={6},
  pages={363--375},
  year={2010},
  organization={ACM}
}


@article{ching2015one,
  title={One trillion edges: graph processing at Facebook-scale},
  author={Ching, Avery and Edunov, Sergey and Kabiljo, Maja and Logothetis, Dionysios and Muthukrishnan, Sambavi},
  journal={Proceedings of the VLDB Endowment},
  volume={8},
  number={12},
  pages={1804--1815},
  year={2015},
  publisher={VLDB Endowment}
}

@article{dean2008mapreduce,
  title={MapReduce: simplified data processing on large clusters},
  author={Dean, Jeffrey and Ghemawat, Sanjay},
  journal={Communications of the ACM},
  volume={51},
  number={1},
  pages={107--113},
  year={2008},
  publisher={ACM}
}


@inproceedings{ekanayake2010twister,
  title={Twister: a runtime for iterative mapreduce},
  author={Ekanayake, Jaliya and Li, Hui and Zhang, Bingjing and Gunarathne, Thilina and Bae, Seung-Hee and Qiu, Judy and Fox, Geoffrey},
  booktitle={Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing},
  pages={810--818},
  year={2010},
  organization={ACM}
}


@inproceedings{ghemawat2003google,
  title={The Google file system},
  author={Ghemawat, Sanjay and Gobioff, Howard and Leung, Shun-Tak},
  booktitle={ACM SIGOPS operating systems review},
  volume={37},
  number={5},
  pages={29--43},
  year={2003},
  organization={ACM}
}

@inproceedings{gonzalez2012powergraph,
  title={Powergraph: Distributed graph-parallel computation on natural graphs},
  author={Gonzalez, Joseph E and Low, Yucheng and Gu, Haijie and Bickson, Danny and Guestrin, Carlos},
  booktitle={Presented as part of the 10th USENIX Symposium on Operating Systems Design and Implementation (OSDI 12)},
  pages={17--30},
  year={2012}
}

@inproceedings{hindman2011mesos,
  title={Mesos: A Platform for Fine-Grained Resource Sharing in the Data Center.},
  author={Hindman, Benjamin and Konwinski, Andy and Zaharia, Matei and Ghodsi, Ali and Joseph, Anthony D and Katz, Randy H and Shenker, Scott and Stoica, Ion},
  booktitle={NSDI},
  volume={11},
  pages={22--22},
  year={2011}
}

@inproceedings{hunt2010zookeeper,
  title={ZooKeeper: Wait-free Coordination for Internet-scale Systems.},
  author={Hunt, Patrick and Konar, Mahadev and Junqueira, Flavio Paiva and Reed, Benjamin},
  booktitle={USENIX Annual Technical Conference},
  volume={8},
  pages={9},
  year={2010}
}

@inproceedings{isard2007dryad,
  title={Dryad: distributed data-parallel programs from sequential building blocks},
  author={Isard, Michael and Budiu, Mihai and Yu, Yuan and Birrell, Andrew and Fetterly, Dennis},
  booktitle={ACM SIGOPS Operating Systems Review},
  volume={41},
  number={3},
  pages={59--72},
  year={2007},
  organization={ACM}
}


@inproceedings{kreps2011kafka,
  title={Kafka: A distributed messaging system for log processing},
  author={Kreps, Jay and Narkhede, Neha and Rao, Jun and others},
  booktitle={Proceedings of the NetDB},
  pages={1--7},
  year={2011}
}

@inproceedings{li2014tachyon,
  title={Tachyon: Reliable, memory speed storage for cluster computing frameworks},
  author={Li, Haoyuan and Ghodsi, Ali and Zaharia, Matei and Shenker, Scott and Stoica, Ion},
  booktitle={Proceedings of the ACM Symposium on Cloud Computing},
  pages={1--15},
  year={2014},
  organization={ACM}
}


@inproceedings{malewicz2010pregel,
  title={Pregel: a system for large-scale graph processing},
  author={Malewicz, Grzegorz and Austern, Matthew H and Bik, Aart JC and Dehnert, James C and Horn, Ilan and Leiser, Naty and Czajkowski, Grzegorz},
  booktitle={Proceedings of the 2010 ACM SIGMOD International Conference on Management of data},
  pages={135--146},
  year={2010},
  organization={ACM}
}
@inproceedings{okcan2011processing,
  title={Processing theta-joins using MapReduce},
  author={Okcan, Alper and Riedewald, Mirek},
  booktitle={Proceedings of the 2011 ACM SIGMOD International Conference on Management of data},
  pages={949--960},
  year={2011},
  organization={ACM}
}

@inproceedings{olston2008pig,
  title={Pig latin: a not-so-foreign language for data processing},
  author={Olston, Christopher and Reed, Benjamin and Srivastava, Utkarsh and Kumar, Ravi and Tomkins, Andrew},
  booktitle={Proceedings of the 2008 ACM SIGMOD international conference on Management of data},
  pages={1099--1110},
  year={2008},
  organization={ACM}
}

@article{pike2005interpreting,
  title={Interpreting the data: Parallel analysis with Sawzall},
  author={Pike, Rob and Dorward, Sean and Griesemer, Robert and Quinlan, Sean},
  journal={Scientific Programming},
  volume={13},
  number={4},
  pages={277--298},
  year={2005},
  publisher={Hindawi Publishing Corporation}
}

@inproceedings{shvachko2010hadoop,
  title={The hadoop distributed file system},
  author={Shvachko, Konstantin and Kuang, Hairong and Radia, Sanjay and Chansler, Robert},
  booktitle={2010 IEEE 26th symposium on mass storage systems and technologies (MSST)},
  pages={1--10},
  year={2010},
  organization={IEEE}
}

@online{WinNT,
  author = {Tarau, Paul},
  title = {Bulk synchronous model},
  year = 2014,
  url = {http://www.cse.unt.edu/~tarau/teaching/parpro/papers/Bulk%20synchronous%20parallel.pdf},
  urldate = {2016-11-24}
}

@article{thusoo2009hive,
  title={Hive: a warehousing solution over a map-reduce framework},
  author={Thusoo, Ashish and Sarma, Joydeep Sen and Jain, Namit and Shao, Zheng and Chakka, Prasad and Anthony, Suresh and Liu, Hao and Wyckoff, Pete and Murthy, Raghotham},
  journal={Proceedings of the VLDB Endowment},
  volume={2},
  number={2},
  pages={1626--1629},
  year={2009},
  publisher={VLDB Endowment}
}

@inproceedings{thusoo2010hive,
  title={Hive-a petabyte scale data warehouse using hadoop},
  author={Thusoo, Ashish and Sarma, Joydeep Sen and Jain, Namit and Shao, Zheng and Chakka, Prasad and Zhang, Ning and Antony, Suresh and Liu, Hao and Murthy, Raghotham},
  booktitle={2010 IEEE 26th International Conference on Data Engineering (ICDE 2010)},
  pages={996--1005},
  year={2010},
  organization={IEEE}
}

@article{valiant1990bridging,
  title={A bridging model for parallel computation},
  author={Valiant, Leslie G},
  journal={Communications of the ACM},
  volume={33},
  number={8},
  pages={103--111},
  year={1990},
  publisher={ACM}
}

@inproceedings{vavilapalli2013apache,
  title={Apache hadoop yarn: Yet another resource negotiator},
  author={Vavilapalli, Vinod Kumar and Murthy, Arun C and Douglas, Chris and Agarwal, Sharad and Konar, Mahadev and Evans, Robert and Graves, Thomas and Lowe, Jason and Shah, Hitesh and Seth, Siddharth and others},
  booktitle={Proceedings of the 4th annual Symposium on Cloud Computing},
  pages={5},
  year={2013},
  organization={ACM}
}

@inproceedings{xin2013graphx,
  title={Graphx: A resilient distributed graph system on spark},
  author={Xin, Reynold S and Gonzalez, Joseph E and Franklin, Michael J and Stoica, Ion},
  booktitle={First International Workshop on Graph Data Management Experiences and Systems},
  pages={2},
  year={2013},
  organization={ACM}
}

@inproceedings{yu2008dryadlinq,
  title={DryadLINQ: A System for General-Purpose Distributed Data-Parallel Computing Using a High-Level Language.},
  author={Yu, Yuan and Isard, Michael and Fetterly, Dennis and Budiu, Mihai and Erlingsson, {\'U}lfar and Gunda, Pradeep Kumar and Currey, Jon},
  booktitle={OSDI},
  volume={8},
  pages={1--14},
  year={2008}
}

@article{zaharia2010spark,
  title={Spark: cluster computing with working sets.},
  author={Zaharia, Matei and Chowdhury, Mosharaf and Franklin, Michael J and Shenker, Scott and Stoica, Ion},
  journal={HotCloud},
  volume={10},
  pages={10--10},
  year={2010}
}

@inproceedings{zaharia2012discretized,
  title={Discretized streams: an efficient and fault-tolerant model for stream processing on large clusters},
  author={Zaharia, Matei and Das, Tathagata and Li, Haoyuan and Shenker, Scott and Stoica, Ion},
  booktitle={Presented as part of the},
  year={2012}
}

@article{zhang2012imapreduce,
  title={imapreduce: A distributed computing framework for iterative computation},
  author={Zhang, Yanfeng and Gao, Qixin and Gao, Lixin and Wang, Cuirong},
  journal={Journal of Grid Computing},
  volume={10},
  number={1},
  pages={47--68},
  year={2012},
  publisher={Springer}
}