In [10]:
import pandas as pd
import numpy as np
%reload_ext autoreload
%autoreload 2
In [218]:
import plotly
import plotly.plotly as py
import plotly.graph_objs as go

from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import *
import numpy as np

init_notebook_mode(connected=True)
In [219]:
df_nips  = pd.concat([pd.read_csv("./autor_title1.bin"), pd.read_csv("./autor_title2.bin")])
df_icml = pd.read_csv("./ICML.csv")
In [220]:
def df_to_list(df):
    df = df.dropna()
    l_paper = []
    for paper in df.l_author:
        p = eval(paper)
        l_paper.append(p)
    l_author = [item for sublist in l_paper for item in sublist]
    l_author = [(p[0][1:], p[1]) for p in l_author if len(p[0]) > 2]
    l_author = np.array(l_author)
    return l_paper, l_author
In [221]:
l_paper, l_author = df_to_list(df_nips)
In [222]:
def good_inst(inst):
    l_inst = []
    if 'Deep' in inst:
        l_inst.append('DeepMind')
    if 'Berkeley' in inst:
        l_inst.append('Berkeley')
    if 'Google' in inst and "Deep" not in inst:
        l_inst.append('Google')
    if 'CMU' in inst or 'Carnegie' in inst:
        l_inst.append('CMU')
    if 'Stan' in inst or 'stan' in inst:
        l_inst.append('Stanford University')
    if 'MIT' in inst or 'Massachusetts In' in inst:
        l_inst.append('MIT')
    if 'Oxford' in inst:
        l_inst.append('Oxford')
    if 'EPFL' in inst or 'Lausanne' in inst:
        l_inst.append('EPFL')
    if 'Microsoft' in inst:
        l_inst.append('Microsoft')
    if 'Toronto' in inst:
        l_inst.append('University of Toronto')
    if 'Duke' in inst:
        l_inst.append('Duke University')
    if 'Tsinghua' in inst:
        l_inst.append('Tsinghua University')
    if 'Cornell' in inst:
        l_inst.append('Cornell University')
    if 'Columbia' in inst and 'British' not in inst:
        l_inst.append('Columbia University')
    if 'Princeton' in inst:
        l_inst.append('Princeton University')
    if 'ETH' in inst or 'Zurich' in inst:
        l_inst.append('ETH Zurich')
    if 'Harvard' in inst:
        l_inst.append('Harvard University')
    if 'Purdue' in inst:
        l_inst.append('Purdue University')
    if 'Rice' in inst:
        l_inst.append('Rice University')
    if 'Virginia' in inst:
        l_inst.append('University of Virginia')
    if 'Cambridge' in inst:
        l_inst.append('University of Cambridge')
    if 'Georgia' in inst:
        l_inst.append('Georgia Tech')
    if 'USC' in inst or 'Southern' in inst:
        l_inst.append('University of Southern California')
    if 'Tencent' in inst or 'Tecent' in inst:
        l_inst.append('Tencent AI Lab')
    if 'Texas' in inst:
        l_inst.append('University of Texas')
    if 'Facebook' in inst or 'FAIR' in inst:
        l_inst.append('FAIR')
    if 'Caltech' in inst:
        l_inst.append('Caltech')
    if 'Wisconsin' in inst:
        l_inst.append('University of Wisconsin-Madison')
    if 'Brown' in inst:
        l_inst.append('Brown University')
    if 'Aalto' in inst:
        l_inst.append('Aalto University')
    if 'INRIA' in inst or 'Inria' in inst:
        l_inst.append('INRIA')
    if 'Technion' in inst:
        l_inst.append('Technion')
    if 'Amazon' in inst:
        l_inst.append('Amazon')
    if 'York' in inst or "NYU" in inst:
        l_inst.append('New York University')
    if 'Davis' in inst:
        l_inst.append('University of California, Davis')
    if 'UCLA' in inst or 'Angeles' in inst:
        l_inst.append('UCLA')
    if 'Michigan' in inst:
        l_inst.append('University of Michigan')
    if 'Weizmann' in inst:
        l_inst.append('Weizmann Institute of Science')
    if 'Yale' in inst:
        l_inst.append('Yale University')
    if 'Gill' in inst:
        l_inst.append('McGill University')
    if 'University of Chicago' in inst or 'U. Chicago' in inst:
        l_inst.append('University of Chicago')
    if 'Montreal' in inst or 'MILA' in inst:
        l_inst.append('University of Montreal')
    if 'Boston' in inst:
        l_inst.append('Boston University')
    if 'Telecom' in inst:
        l_inst.append('Telecom ParisTech')
    if 'Ohio' in inst:
        l_inst.append('The Ohio State University')
    if 'Iowa' in inst:
        l_inst.append('The University of Iowa')
    if 'Austin' in inst:
        l_inst.append('UT Austin')
    if 'Edinburgh' in inst:
        l_inst.append('University of Edinburgh')
    if 'MPI' in inst or 'Max' in inst:
        l_inst.append('MPI')
    if 'UIUC' in inst or 'Urbana' in inst:
        l_inst.append('University of Illinois at Urbana-Champaign')
    if 'IBM' in inst:
        l_inst.append('IBM Research')
    if 'Baidu' in inst:
        l_inst.append('Baidu Research')
    if 'CNRS' in inst:
        l_inst.append('CNRS')
    if 'RIKEN' in inst:
        l_inst.append('RIKEN')
    if 'Intel' in inst or 'intel' in inst:
        l_inst.append('RIKEN')
    if 'Open' in inst:
        l_inst.append('Open AI')
    if 'IIT' in inst or "Indian Institute" in inst:
        l_inst.append('IIT')
    if 'Normale' in inst or "ENS " in inst or inst[-3:] == "ENS":
        l_inst.append('ENS')
    if 'Adobe' in inst:
        l_inst.append('Adobe')
    if 'Alibaba' in inst:
        l_inst.append('Alibaba')
    if 'Data61' in inst:
        l_inst.append('Data61')
    if 'Delft' in inst:
        l_inst.append('TU Delft')
    if len(l_inst) == 0:
        l_inst = [inst]

    return l_inst
In [223]:
def count_author(l_author):
    author_unique, count_unique = np.unique(l_author[:,0], return_counts=True)
    count_sort_ind = np.argsort(-count_unique)
    author_unique = author_unique[count_sort_ind]
    count_unique = sorted(count_unique, reverse=True)
    return author_unique, count_unique

def count_inst(l_author):
    inst_unique, count_unique = np.unique(l_author[:,1], return_counts=True)
    count_sort_ind = np.argsort(-count_unique)
    inst_unique = inst_unique[count_sort_ind]
    count_unique = sorted(count_unique, reverse=True)
    return inst_unique, count_unique

def inst_by_paper(l_paper):
    dic_inst_by_paper = {}
    for auths in l_paper:
        len_auth = len(auths)
        for a in auths:
            aaaa = good_inst(a[1])
            inst_by_auth = len(aaaa)
            for i in aaaa:
                if i in dic_inst_by_paper:
                    dic_inst_by_paper[i] += 1/(len_auth*inst_by_auth)
                else:
                    dic_inst_by_paper[i] = 1/(len_auth*inst_by_auth)
    items = list(dic_inst_by_paper.items())
    list_inst_by_paper = sorted(items, key = lambda x: x[1], reverse=True)
    list_inst_by_paper = list_inst_by_paper[::-1]
    return list_inst_by_paper
In [224]:
l_paper_icml, l_author_icml = df_to_list(df_icml)
l_paper_nips, l_author_nips = df_to_list(df_nips)
In [225]:
author_unique_icml, count_unique_a_icml = count_author(l_author_icml)
inst_unique_icml, count_unique_i_icml = count_inst(l_author_icml)
list_inst_by_paper_icml = inst_by_paper(l_paper_icml)
In [226]:
author_unique_nips, count_unique_a_nips = count_author(l_author_nips)
inst_unique_nips, count_unique_i_nips = count_inst(l_author_nips)
list_inst_by_paper_nips = inst_by_paper(l_paper_nips)
In [197]:
df_a_icml = pd.DataFrame({"author_unique":author_unique_icml,"count_unique_icml":count_unique_a_icml})
df_a_nips = pd.DataFrame({"author_unique":author_unique_nips,"count_unique_nips":count_unique_a_nips})
In [261]:
df_a = df_a_icml.merge(df_a_nips, "outer")
df_a = df_a.fillna(0)

df_a["count_unique"] = df_a["count_unique_icml"] + df_a["count_unique_nips"]
df_a["count_unique_norm"] = df_a["count_unique_icml"] * 1.6 + df_a["count_unique_nips"]
In [262]:
df_i_icml = pd.DataFrame({"author_unique":inst_unique_icml,"count_unique_icml":count_unique_i_icml})
df_i_nips = pd.DataFrame({"author_unique":inst_unique_nips,"count_unique_nips":count_unique_i_nips})
In [263]:
df_i = df_i_icml.merge(df_i_nips, "outer")
df_i["count_unique"] = df_i["count_unique_icml"] + df_i["count_unique_nips"]
df_i["count_unique_norm"] = df_i["count_unique_icml"] * 1.6 + df_i["count_unique_nips"]
In [264]:
list_inst_by_paper_nips = np.array(list_inst_by_paper_nips)
list_inst_by_paper_icml = np.array(list_inst_by_paper_icml)

df_ibp_icml = pd.DataFrame({"inst":list_inst_by_paper_icml[:,0],"nb_paper_icml":list_inst_by_paper_icml[:,1].astype("float")})
df_ibp_nips = pd.DataFrame({"inst":list_inst_by_paper_nips[:,0],"nb_paper_nips":list_inst_by_paper_nips[:,1].astype("float")})
In [265]:
df = df_ibp_icml.merge(df_ibp_nips, "outer")
df = df.fillna(0)
df["count_unique"] = df["nb_paper_nips"] + df["nb_paper_icml"]
df["count_unique_norm"] = df["nb_paper_icml"] * 1.6 + df["nb_paper_nips"]
In [266]:
nb = 20
df = df.sort_values("count_unique_norm", ascending=False)
data_nips = [go.Bar(
            x=df["inst"][:nb],
            y=df["nb_paper_nips"][:nb],
            name="NIPS",

    )]

data_icml = [go.Bar(
            x=df["inst"][:nb],
            y=df["nb_paper_icml"][:nb]*1.6,
            name="ICML",
    )]

layout = go.Layout(
    xaxis=dict(tickangle=-30),
    title='NIPS and ICML number of paper by institution (2018, normalised by number of paper)',
    barmode='group',
    
)
fig = go.Figure(data=data_nips+data_icml, layout=layout)
iplot(fig)
In [248]:
nb = 20
df = df.sort_values("count_unique_norm", ascending=False)
data_nips = [go.Bar(
            x=df["inst"][:nb],
            y=df["nb_paper_nips"][:nb],
            name="NIPS",

    )]

data_icml = [go.Bar(
            x=df["inst"][:nb],
            y=df["nb_paper_icml"][:nb],
            name="ICML",
    )]

layout = go.Layout(
    xaxis=dict(tickangle=-30),
    title='NIPS and ICML number of paper by institution (2018)',
    barmode='group',
    
)
fig = go.Figure(data=data_nips+data_icml, layout=layout)
iplot(fig)
In [267]:
nb = 25
df = df.sort_values("nb_paper_nips", ascending=False)
data_nips = [go.Bar(
            x=df["inst"][:nb],
            y=df["nb_paper_nips"][:nb],
            name="NIPS",

    )]


layout = go.Layout(
    xaxis=dict(tickangle=-30),
    title='NIPS  number of paper by institution (2018)',
    barmode='group',
    
)
fig = go.Figure(data=data_nips, layout=layout)
iplot(fig)
In [269]:
nb = 30
df_a = df_a.sort_values("count_unique", ascending=False)
data_nips = [go.Bar(
            x=df_a["author_unique"][:nb],
            y=df_a["count_unique_nips"][:nb],
            name="NIPS",

    )]

data_icml = [go.Bar(
            x=df_a["author_unique"][:nb],
            y=df_a["count_unique_icml"][:nb],
            name="ICML",
    )]

layout = go.Layout(
    xaxis=dict(tickangle=-30),
    title='NIPS and ICML top authors (2018)',
    barmode='stack',
    
)
fig = go.Figure(data=data_nips+data_icml, layout=layout)
iplot(fig)
In [270]:
nb = 40
df_a = df_a.sort_values("count_unique_nips", ascending=False)
data_nips = [go.Bar(
            x=df_a["author_unique"][:nb],
            y=df_a["count_unique_nips"][:nb],
            name="NIPS",

    )]


layout = go.Layout(
    xaxis=dict(tickangle=-30),
    title='NIPS top authors (2018)',
    barmode='stack',
    
)
fig = go.Figure(data=data_nips, layout=layout)
iplot(fig)
In [260]:
df_a.sort_values("count_unique_nips", ascending=False)
Out[260]:
author_unique count_unique_icml count_unique_nips count_unique count_unique_norm
1696 Josh Tenenbaum NaN 10.0 NaN NaN
21 Michael Jordan 4.0 8.0 12.0 14.4
1697 Francis Bach NaN 7.0 NaN NaN
1 Sergey Levine 6.0 7.0 13.0 16.6
233 Mingyuan Zhou 2.0 6.0 8.0 9.2
53 Masashi Sugiyama 3.0 6.0 9.0 10.8
4 Eric Xing 5.0 6.0 11.0 14.0
199 Zeyuan Allen-Zhu 2.0 6.0 8.0 9.2
41 Wei Liu 3.0 5.0 8.0 9.8
99 Bo Dai 2.0 5.0 7.0 8.2
113 Honglak Lee 2.0 5.0 7.0 8.2
218 Zhi-Hua Zhou 2.0 5.0 7.0 8.2
1700 Jiajun Wu NaN 5.0 NaN NaN
863 Tuomas Sandholm 1.0 5.0 6.0 6.6
256 Yoshua Bengio 2.0 5.0 7.0 8.2
67 Stefano Ermon 3.0 5.0 8.0 9.8
1698 Bill Freeman NaN 5.0 NaN NaN
26 Pradeep Ravikumar 3.0 5.0 8.0 9.8
1699 Alessandro Rudi NaN 5.0 NaN NaN
1701 Lorenzo Rosasco NaN 5.0 NaN NaN
1704 Zhanxing Zhu NaN 4.0 NaN NaN
1703 Jonathan W Pillow NaN 4.0 NaN NaN
1702 Tieyan Liu NaN 4.0 NaN NaN
385 Nati Srebro 1.0 4.0 5.0 5.6
1530 Ian Goodfellow 1.0 4.0 5.0 5.6
1269 Armando Solar-Lezama 1.0 4.0 5.0 5.6
1394 Jure Leskovec 1.0 4.0 5.0 5.6
1445 Massimiliano Pontil 1.0 4.0 5.0 5.6
1705 Wotao Yin NaN 4.0 NaN NaN
729 Stefanie Jegelka 1.0 4.0 5.0 5.6
... ... ... ... ... ...
1658 Jaesik Choi 1.0 NaN NaN NaN
1659 J. Smith 1.0 NaN NaN NaN
1660 Itzhak Tamo 1.0 NaN NaN NaN
1661 Itay Safran 1.0 NaN NaN NaN
1662 Ion Stoica 1.0 NaN NaN NaN
1663 Ioannis Mitliagkas 1.0 NaN NaN NaN
1664 Ioannis Antonoglou 1.0 NaN NaN NaN
1665 Inst. of Technology Carlo Fischione 1.0 NaN NaN NaN
1666 Ingmar Posner 1.0 NaN NaN NaN
1667 Ilya Tolstikhin 1.0 NaN NaN NaN
1668 Ilja Kuzborskij 1.0 NaN NaN NaN
1670 Jaime Fisac 1.0 NaN NaN NaN
1672 Jakob Verbeek 1.0 NaN NaN NaN
1674 Jean Feng 1.0 NaN NaN NaN
1675 Jayesh Gupta 1.0 NaN NaN NaN
1677 Javier González 1.0 NaN NaN NaN
1678 Jason Pacheco 1.0 NaN NaN NaN
1679 Jason Hartford 1.0 NaN NaN NaN
1681 Jane Wang 1.0 NaN NaN NaN
1682 Jan-Hendrik Lange 1.0 NaN NaN NaN
1683 Jan Peters 1.0 NaN NaN NaN
1684 Jan Kleindienst 1.0 NaN NaN NaN
1685 Jamie Smith 1.0 NaN NaN NaN
1687 James Martens 1.0 NaN NaN NaN
1688 James Lucas 1.0 NaN NaN NaN
1691 James Bailey 1.0 NaN NaN NaN
1692 Jakub Tomczak 1.0 NaN NaN NaN
1693 Jakub Tarnawski 1.0 NaN NaN NaN
1694 Jeff Dean 1.0 NaN NaN NaN
1695 xue wang 1.0 NaN NaN NaN

4131 rows × 5 columns