In [229]:
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
import matplotlib.cm as cm
import seaborn
seaborn.set()
%matplotlib inline
In [230]:
### Loading dataframe
df_paper = pd.read_json('./paper_df.json')
df_author = pd.read_json('./author_df.json')
df_inst = pd.read_json('./inst_df.json')
df_country = pd.read_json('./country_df.json')
df_paper = df_paper.drop([0]) # has nan
# All data are available at https://github.com/pajotarthur/ICLR_data

Average score of the papers, for each type of decision

In [231]:
groupby_decision = df_paper.groupby('decision')
groupby_decision.aggregate(np.average).review
Out[231]:
decision
Accept (Oral)               7.304348
Accept (Poster)             6.497877
Invite to Workshop Track    5.362963
Reject                      4.644313
Name: review, dtype: float64

Histogram of the score

In [232]:
ag.plot(kind = 'bar', colormap = cm.Accent, width = 1,figsize=(15,10),stacked=True)
plt.show()

Histogram of the score, not stacked. The overlap is quite important

In [233]:
df = pd.DataFrame({'value': df_paper.review.round(1), 'type': df_paper.decision})
df['dummy'] = 1
ag = df.groupby(['value','type']).sum().unstack()
ag.columns = ag.columns.droplevel()

ag.plot(kind = 'bar', colormap = cm.Accent, width = 1,figsize=(15,10))
plt.show()

Top author for number of accepted paper

In [234]:
df_author.sort_values(['nb_poster','nb_oral','nb_workshop'],ascending=False).head(25)
Out[234]:
acceptance_rate avg_nb_cohauthor avg_note avg_oral avg_poster avg_reject avg_workshop id institution institution_country nb_oral nb_paper nb_poster nb_reject nb_workshop
Sergey Levine 0.636364 4.272727 5.848485 0.000000 6.333333 4.333333 5.222222 svlevine@eecs.berkeley.edu eecs.berkeley.edu edu 0 11 7 1 3
Yoshua Bengio 0.388889 5.444444 5.481481 0.000000 6.190476 5.000000 5.166667 bengioy@iro.umontreal.ca iro.umontreal.ca ca 0 18 7 9 2
Richard Socher 0.600000 3.400000 6.166667 0.000000 6.722222 5.444444 5.000000 rsocher@salesforce.com salesforce.com com 0 10 6 3 1
Caiming Xiong 0.714286 3.428571 6.190476 0.000000 6.666667 5.000000 0.000000 cxiong@salesforce.com salesforce.com com 0 7 5 2 0
Pieter Abbeel 0.500000 5.500000 6.166667 7.500000 6.333333 5.750000 5.333333 pabbeel@cs.berkeley.edu cs.berkeley.edu edu 2 12 4 4 2
Dawn Song 0.454545 4.363636 5.909091 7.000000 6.583333 5.416667 5.000000 dawnsong@cs.berkeley.edu cs.berkeley.edu edu 1 11 4 4 2
Ian Goodfellow 0.800000 5.200000 5.866667 0.000000 6.333333 0.000000 4.000000 goodfellow@google.com google.com com 0 5 4 0 1
Kyunghyun Cho 0.666667 4.333333 5.777778 0.000000 6.416667 4.500000 0.000000 kyunghyun.cho@nyu.edu nyu.edu edu 0 6 4 2 0
Zachary C. Lipton 0.800000 5.000000 6.266667 0.000000 6.416667 5.666667 0.000000 zlipton@cmu.edu cmu.edu edu 0 5 4 1 0
Max Welling 0.800000 4.000000 6.533333 8.000000 6.777778 4.333333 0.000000 m.welling@uva.nl uva.nl nl 1 5 3 1 0
Ruslan Salakhutdinov 0.500000 3.250000 5.666667 7.333333 7.000000 4.250000 0.000000 rsalakhu@cs.cmu.edu cs.cmu.edu edu 1 8 3 4 0
Adam Trischler 0.600000 5.200000 6.266667 0.000000 6.555556 5.666667 6.000000 adam.trischler@microsoft.com microsoft.com com 0 5 3 1 1
Oriol Vinyals 0.500000 6.500000 6.055556 0.000000 6.555556 5.500000 5.666667 vinyals@google.com google.com com 0 6 3 2 1
Xi Chen 0.750000 6.500000 6.083333 0.000000 6.222222 0.000000 5.666667 peter@openai.com openai.com com 0 4 3 0 1
Animashree Anandkumar 0.750000 4.500000 6.083333 0.000000 6.333333 5.333333 0.000000 anima@caltech.edu caltech.edu edu 0 4 3 1 0
Chelsea Finn 0.750000 4.500000 6.083333 0.000000 6.666667 4.333333 0.000000 cbfinn@eecs.berkeley.edu eecs.berkeley.edu edu 0 4 3 1 0
Demis Hassabis 0.750000 11.000000 6.250000 0.000000 6.222222 6.333333 0.000000 demishassabis@google.com google.com com 0 4 3 1 0
Dengyong Zhou 1.000000 5.333333 6.333333 0.000000 6.333333 0.000000 0.000000 dennyzhou@google.com google.com com 0 3 3 0 0
Douwe Kiela 1.000000 5.333333 7.000000 0.000000 7.000000 0.000000 0.000000 dkiela@fb.com fb.com com 0 3 3 0 0
Jimmy Ba 1.000000 4.000000 6.555556 0.000000 6.555556 0.000000 0.000000 jimmy@psi.toronto.edu psi.toronto.edu edu 0 3 3 0 0
Lior Wolf 1.000000 3.000000 6.111111 0.000000 6.111111 0.000000 0.000000 wolf@fb.com fb.com com 0 3 3 0 0
Ludovic Denoyer 1.000000 4.000000 6.777778 0.000000 6.777778 0.000000 0.000000 ludovic.denoyer@upmc.fr upmc.fr fr 0 3 3 0 0
Lukasz Kaiser 0.500000 5.000000 5.944444 0.000000 7.000000 4.888889 0.000000 lukaszkaiser@google.com google.com com 0 6 3 3 0
Nando de Freitas 0.750000 7.750000 5.500000 0.000000 5.777778 4.666667 0.000000 nandodefreitas@google.com google.com com 0 4 3 1 0
Nicolas Heess 0.600000 8.200000 5.466667 0.000000 6.555556 3.833333 0.000000 heess@google.com google.com com 0 5 3 2 0

Top author for number of submitted paper

In [235]:
df_author.sort_values(['nb_paper'],ascending=False).head(10)
Out[235]:
acceptance_rate avg_nb_cohauthor avg_note avg_oral avg_poster avg_reject avg_workshop id institution institution_country nb_oral nb_paper nb_poster nb_reject nb_workshop
Yoshua Bengio 0.388889 5.444444 5.481481 0.000000 6.190476 5.000000 5.166667 bengioy@iro.umontreal.ca iro.umontreal.ca ca 0 18 7 9 2
Pieter Abbeel 0.500000 5.500000 6.166667 7.500000 6.333333 5.750000 5.333333 pabbeel@cs.berkeley.edu cs.berkeley.edu edu 2 12 4 4 2
Dawn Song 0.454545 4.363636 5.909091 7.000000 6.583333 5.416667 5.000000 dawnsong@cs.berkeley.edu cs.berkeley.edu edu 1 11 4 4 2
Sergey Levine 0.636364 4.272727 5.848485 0.000000 6.333333 4.333333 5.222222 svlevine@eecs.berkeley.edu eecs.berkeley.edu edu 0 11 7 1 3
Richard Socher 0.600000 3.400000 6.166667 0.000000 6.722222 5.444444 5.000000 rsocher@salesforce.com salesforce.com com 0 10 6 3 1
Quoc V. Le 0.222222 4.111111 5.148148 0.000000 6.166667 4.444444 5.166667 adai@google.com google.com com 0 9 2 3 4
Ruslan Salakhutdinov 0.500000 3.250000 5.666667 7.333333 7.000000 4.250000 0.000000 rsalakhu@cs.cmu.edu cs.cmu.edu edu 1 8 3 4 0
Caiming Xiong 0.714286 3.428571 6.190476 0.000000 6.666667 5.000000 0.000000 cxiong@salesforce.com salesforce.com com 0 7 5 2 0
Lukasz Kaiser 0.500000 5.000000 5.944444 0.000000 7.000000 4.888889 0.000000 lukaszkaiser@google.com google.com com 0 6 3 3 0
Oriol Vinyals 0.500000 6.500000 6.055556 0.000000 6.555556 5.500000 5.666667 vinyals@google.com google.com com 0 6 3 2 1

Top author for average note. We choose (arbitrarly) to keep only the authors that submitted more than tree papers.

In [236]:
df_author[df_author.nb_paper > 3].sort_values(['avg_note'],ascending=False).head(10)
Out[236]:
acceptance_rate avg_nb_cohauthor avg_note avg_oral avg_poster avg_reject avg_workshop id institution institution_country nb_oral nb_paper nb_poster nb_reject nb_workshop
Igor Mordatch 0.75 5.50 7.000000 7.500000 6.333333 6.666667 0.000000 mordatch@openai.com openai.com com 2 4 1 1 0
Amnon Shashua 0.75 3.00 6.916667 8.000000 6.833333 0.000000 6.000000 shashua@cs.huji.ac.il cs.huji.ac.il il 1 4 2 0 1
Max Welling 0.80 4.00 6.533333 8.000000 6.777778 4.333333 0.000000 m.welling@uva.nl uva.nl nl 1 5 3 1 0
Trevor Darrell 0.50 7.25 6.500000 7.666667 6.666667 6.333333 5.333333 trevor@eecs.berkeley.edu eecs.berkeley.edu edu 1 4 1 1 1
Jason D. Lee 0.50 3.00 6.500000 0.000000 7.500000 5.666667 5.333333 jasonlee@marshall.usc.edu marshall.usc.edu edu 0 4 2 1 1
Bo Li 0.60 5.60 6.466667 7.000000 6.833333 5.833333 0.000000 crystalboli@berkeley.edu berkeley.edu edu 1 5 2 2 0
Warren He 0.50 4.75 6.333333 0.000000 6.833333 5.833333 0.000000 _w@eecs.berkeley.edu eecs.berkeley.edu edu 0 4 2 2 0
Adam Trischler 0.60 5.20 6.266667 0.000000 6.555556 5.666667 6.000000 adam.trischler@microsoft.com microsoft.com com 0 5 3 1 1
Zachary C. Lipton 0.80 5.00 6.266667 0.000000 6.416667 5.666667 0.000000 zlipton@cmu.edu cmu.edu edu 0 5 4 1 0
Arthur Szlam 0.50 5.75 6.250000 0.000000 7.166667 5.333333 0.000000 aszlam@fb.com fb.com com 0 4 2 2 0

Top institution for number of accepted paper.

The institution have written 1 paper if all the authors are from the institution. Otherwise, if among 3 authors, only 1 is from the institution, they have written 1/3 of a paper. The institution have been scrapped from the author's email adress. There is probably more work to do, the code is available at https://github.com/pajotarthur/ICLR_data/blob/master/add_paper_origin.ipynb. Email ending with gmail.com have been removed.

In [237]:
df_inst.sort_values(['nb_poster','nb_oral','nb_workshop'],ascending=False).head(10)
Out[237]:
acceptance_by_author acceptance_rate avg_note avg_oral avg_poster avg_reject avg_workshop nb_author nb_oral nb_paper nb_poster nb_reject nb_workshop
Google 0.177271 0.560082 5.799223 7.407767 6.419944 4.869080 5.082126 264 3.433333 83.558514 43.366306 28.544589 8.214286
Berkeley 0.184403 0.458882 5.826673 7.361257 6.373487 5.263065 5.209073 79 2.122222 31.746429 12.445635 10.136905 7.041667
Stanford 0.132825 0.374703 5.644172 9.000000 6.838735 4.709716 5.535826 75 1.000000 26.586147 8.961905 14.840909 1.783333
Facebook 0.232381 0.548931 5.773903 5.333333 6.562064 4.647059 5.484211 35 0.166667 14.816667 7.966667 5.100000 1.583333
Carnegie Mellon University 0.116500 0.273579 5.317034 7.454545 6.590441 4.726561 5.442982 76 0.916667 32.363492 7.937302 20.976190 2.533333
Microsoft 0.122770 0.390195 5.735216 7.714286 6.754231 4.868910 5.775910 71 1.166667 22.339286 7.550000 11.639286 1.983333
Toronto Univ. 0.188889 0.533566 6.194420 0.000000 6.666667 5.818182 5.539683 27 0.000000 9.558333 5.100000 1.833333 2.625000
ETH 0.149510 0.442671 5.610063 0.000000 6.666667 4.770833 0.000000 34 0.000000 11.483333 5.083333 6.400000 0.000000
Oxford Univ. 0.183662 0.601182 5.740124 7.000000 6.259895 4.840000 5.666667 29 0.333333 8.859524 4.992857 3.333333 0.200000
Washington Univ. 0.145506 0.477861 5.874401 7.190476 6.674370 5.084555 5.000000 40 0.875000 12.179762 4.945238 5.359524 1.000000

Top institution for number of submitted paper.

In [238]:
df_inst.sort_values(['nb_paper'],ascending=False).head(10)
Out[238]:
acceptance_by_author acceptance_rate avg_note avg_oral avg_poster avg_reject avg_workshop nb_author nb_oral nb_paper nb_poster nb_reject nb_workshop
Google 0.177271 0.560082 5.799223 7.407767 6.419944 4.869080 5.082126 264 3.433333 83.558514 43.366306 28.544589 8.214286
Carnegie Mellon University 0.116500 0.273579 5.317034 7.454545 6.590441 4.726561 5.442982 76 0.916667 32.363492 7.937302 20.976190 2.533333
Berkeley 0.184403 0.458882 5.826673 7.361257 6.373487 5.263065 5.209073 79 2.122222 31.746429 12.445635 10.136905 7.041667
Stanford 0.132825 0.374703 5.644172 9.000000 6.838735 4.709716 5.535826 75 1.000000 26.586147 8.961905 14.840909 1.783333
Microsoft 0.122770 0.390195 5.735216 7.714286 6.754231 4.868910 5.775910 71 1.166667 22.339286 7.550000 11.639286 1.983333
MIT 0.084005 0.262715 5.624772 7.333333 6.607670 5.191318 5.392157 62 0.500000 19.825000 4.708333 10.366667 4.250000
IBM 0.103723 0.305005 5.629649 0.000000 6.815385 5.032326 6.400000 47 0.000000 15.983333 4.875000 10.483333 0.625000
Facebook 0.232381 0.548931 5.773903 5.333333 6.562064 4.647059 5.484211 35 0.166667 14.816667 7.966667 5.100000 1.583333
Washington Univ. 0.145506 0.477861 5.874401 7.190476 6.674370 5.084555 5.000000 40 0.875000 12.179762 4.945238 5.359524 1.000000
ETH 0.149510 0.442671 5.610063 0.000000 6.666667 4.770833 0.000000 34 0.000000 11.483333 5.083333 6.400000 0.000000

Top institution for acceptance rate. We choose (arbitrarly) to keep only the institution that submitted more than tree papers.

In [239]:
df_inst[df_inst.nb_paper > 3].sort_values(['acceptance_rate'],ascending=False).head(10)
Out[239]:
acceptance_by_author acceptance_rate avg_note avg_oral avg_poster avg_reject avg_workshop nb_author nb_oral nb_paper nb_poster nb_reject nb_workshop
cs.huji.ac.il 0.464286 0.764706 6.941176 8.000000 6.888889 0.000000 6.000000 7 1.000000 4.250000 2.250000 0.000000 1.000000
Intel 0.120000 0.750000 6.250000 0.000000 6.777778 4.666667 0.000000 25 0.000000 4.000000 3.000000 1.000000 0.000000
Cambridge 0.247059 0.731343 5.941128 0.000000 6.190476 5.262346 0.000000 17 0.000000 5.742857 4.200000 1.542857 0.000000
UPMC 0.236667 0.702970 6.254125 0.000000 6.643192 5.333333 0.000000 10 0.000000 3.366667 2.366667 1.000000 0.000000
Salesforce 0.853333 0.654731 6.197783 0.000000 6.718750 5.269841 5.000000 5 0.000000 6.516667 4.266667 1.750000 0.500000
NVIDIA 0.165404 0.652824 5.533776 5.666667 6.413793 4.350877 0.000000 18 1.000000 4.560606 1.977273 1.583333 0.000000
ttic.edu 0.291667 0.630631 6.534535 0.000000 7.095238 5.333333 5.809524 8 0.000000 3.700000 2.333333 0.666667 0.700000
Georgia Tech 0.207639 0.611452 5.588957 5.666667 6.143219 4.777778 5.333333 24 0.600000 8.150000 4.383333 3.000000 0.166667
Oxford Univ. 0.183662 0.601182 5.740124 7.000000 6.259895 4.840000 5.666667 29 0.333333 8.859524 4.992857 3.333333 0.200000
Princeton 0.200521 0.587786 5.725191 0.000000 6.363636 4.400000 5.333333 16 0.000000 5.458333 3.208333 1.250000 1.000000

Decision bar plot, for each institution

In [240]:
df_inst[df_inst.nb_poster > 1][['nb_poster','nb_reject','nb_oral','nb_workshop']].sort_values(by='nb_poster').plot.bar(figsize=(15,10),legend=True,fontsize=15,stacked=True)
Out[240]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f60e2e674e0>

Bar plot of the number of accepted paper by institution

In [241]:
df_inst[df_inst.nb_poster > 1].nb_poster.sort_values().plot.bar(figsize=(15,10),legend=False,fontsize=20)
Out[241]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f60e1c4d668>

Bar plot of the acceptance by number of author by institution

In [242]:
df_inst[df_inst.nb_poster > 1].acceptance_by_author.sort_values().plot.bar(figsize=(15,10),legend=False,fontsize=20)
Out[242]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f60e1bfd9e8>

Bar plot of acceptance rate by institution

In [244]:
df_inst[df_inst.nb_poster > 1].acceptance_rate.sort_values().plot.bar(figsize=(15,10),legend=False,fontsize=20)
Out[244]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f60e19ddb38>

Top country for number of accepted paper.

In [175]:
df_country.sort_values(['nb_poster','nb_oral','nb_workshop'],ascending=False).head(10)
Out[175]:
acceptance_rate avg_note avg_oral avg_poster avg_reject avg_workshop nb_oral nb_paper nb_poster nb_reject nb_workshop
com 0.407407 5.512671 7.145833 6.485320 4.724359 5.310606 16 513 193 260 44
edu 0.392377 5.532885 7.238095 6.501035 4.752252 5.401361 14 446 161 222 49
uk 0.390625 5.328125 0.000000 6.280000 4.648649 6.000000 0 64 25 37 2
ca 0.354167 5.236111 8.000000 6.604167 4.240000 5.277778 1 48 16 25 6
fr 0.428571 5.654762 0.000000 6.750000 4.904762 4.333333 0 28 12 14 2
de 0.289474 5.368421 8.000000 6.500000 4.782609 5.250000 1 38 10 23 4
ch 0.321429 5.309524 0.000000 6.370370 4.705882 5.666667 0 28 9 17 2
org 0.346154 5.525641 0.000000 6.629630 4.888889 5.333333 0 26 9 15 2
cn 0.200000 5.180000 6.555556 6.523810 4.644444 5.433333 3 50 7 30 10
jp 0.428571 5.714286 7.166667 6.571429 4.814815 5.444444 2 21 7 9 3

Top country for number of submitted paper.

In [176]:
df_country.sort_values(['nb_paper'],ascending=False).head(10)
Out[176]:
acceptance_rate avg_note avg_oral avg_poster avg_reject avg_workshop nb_oral nb_paper nb_poster nb_reject nb_workshop
com 0.407407 5.512671 7.145833 6.485320 4.724359 5.310606 16 513 193 260 44
edu 0.392377 5.532885 7.238095 6.501035 4.752252 5.401361 14 446 161 222 49
uk 0.390625 5.328125 0.000000 6.280000 4.648649 6.000000 0 64 25 37 2
cn 0.200000 5.180000 6.555556 6.523810 4.644444 5.433333 3 50 7 30 10
ca 0.354167 5.236111 8.000000 6.604167 4.240000 5.277778 1 48 16 25 6
de 0.289474 5.368421 8.000000 6.500000 4.782609 5.250000 1 38 10 23 4
fr 0.428571 5.654762 0.000000 6.750000 4.904762 4.333333 0 28 12 14 2
ch 0.321429 5.309524 0.000000 6.370370 4.705882 5.666667 0 28 9 17 2
org 0.346154 5.525641 0.000000 6.629630 4.888889 5.333333 0 26 9 15 2
jp 0.428571 5.714286 7.166667 6.571429 4.814815 5.444444 2 21 7 9 3

Top country by acceptance rate.

In [177]:
df_country[df_country.nb_paper > 5].sort_values(['acceptance_rate'],ascending=False).head(10)
Out[177]:
acceptance_rate avg_note avg_oral avg_poster avg_reject avg_workshop nb_oral nb_paper nb_poster nb_reject nb_workshop
nl 0.583333 5.916667 8.000000 6.944444 4.266667 0.000000 1 12 6 5 0
au 0.571429 5.857143 7.000000 6.333333 5.000000 0.000000 1 7 3 3 0
il 0.500000 6.133333 8.000000 6.833333 4.888889 5.666667 1 10 4 3 2
jp 0.428571 5.714286 7.166667 6.571429 4.814815 5.444444 2 21 7 9 3
fr 0.428571 5.654762 0.000000 6.750000 4.904762 4.333333 0 28 12 14 2
com 0.407407 5.512671 7.145833 6.485320 4.724359 5.310606 16 513 193 260 44
edu 0.392377 5.532885 7.238095 6.501035 4.752252 5.401361 14 446 161 222 49
uk 0.390625 5.328125 0.000000 6.280000 4.648649 6.000000 0 64 25 37 2
kr 0.375000 5.187500 0.000000 6.388889 4.370370 5.333333 0 16 6 9 1
ca 0.354167 5.236111 8.000000 6.604167 4.240000 5.277778 1 48 16 25 6

Decision bar plot, for each country

In [180]:
df_country[df_country.nb_poster > 1][['nb_poster','nb_reject','nb_oral','nb_workshop']].sort_values(by='nb_poster').plot.bar(figsize=(15,10),legend=True,fontsize=15,stacked=True)
Out[180]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f60e401b358>

Bar plot of the number of accepted paper by country

In [179]:
df_country[df_country.nb_paper >1].nb_paper.sort_values().plot.bar(figsize=(15,10),legend=False,fontsize=25)
Out[179]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f60e438b4a8>