Making networkx graphs from source-target DataFrames

Imports/setup

Let’s just get all of this out of the way up top.

%matplotlib inline

import pandas as pd
import networkx as nx

# Ignore matplotlib warnings
import warnings
warnings.filterwarnings("../ignore")

Let’s deal with our data!

First, read it in as a normal dataframe

df = pd.read_csv("clubs.csv")
df.head()
name club
0 Adams John North Caucus
1 Adams John Long Room Club
2 Adams Samuel North Caucus
3 Adams Samuel Long Room Club
4 Adams Samuel Boston Committee

Convert it to a graph

Each row is an edge with a source and a target. The network will automatically create nodes for you based on their names.

g = nx.from_pandas_dataframe(df, source='name', target='club') 
nx.draw(g)

png

Horrifying. What if I wanted to build one where I highlighted the clubs?

Let’s build a nice visualization

It’s going to take a little work and a few list comprehensions, but we’ll be okay!

# Make a list of the clubs, we'll use it later
clubs = list(df.club.unique())
clubs
['North Caucus',
 'Long Room Club',
 'Boston Committee',
 'London Enemies',
 'St Andrews Lodge',
 'Loyal Nine',
 'Tea Party']
# Make a list of the people, we'll use it later
people = list(df.name.unique())
people
['Adams John',
 'Adams Samuel',
 'Allen Dr',
 'Appleton Nathaniel',
 'Ash Gilbert',
 'Austin Benjamin',
 'Austin Samuel',
 'Avery John',
 'Baldwin Cyrus',
 'Ballard John',
 'Barber Nathaniel',
 'Barnard Samuel',
 'Barrett Samuel',
 'Bass Henry',
 'Bell William',
 'Blake Increase',
 'Boit John',
 'Bolter Thomas',
 'Boyer Peter',
 'Boynton Richard',
 'Brackett Jos',
 'Bradford John',
 'Bradlee David',
 'Bradlee Josiah',
 'Bradlee Nathaniel',
 'Bradlee Thomas',
 'Bray George',
 'Breck William',
 'Bewer James',
 'Brimmer Herman',
 'Brimmer Martin',
 'Broomfield Henry',
 'Brown Hugh',
 'Brown Enoch',
 'Brown John',
 'Bruce Stephen',
 'Burbeck Edward',
 'Burbeck William',
 'Burt Benjamin',
 'Burton Benjamin',
 'Cailleteau Edward',
 'Callendar Elisha',
 'Campbell Nicholas',
 'Cazneau Capt',
 'Chadwell Mr',
 'Champney Caleb',
 'Chase Thomas',
 'Cheever Ezekiel',
 'Chipman Seth',
 'Chrysty Thomas',
 'Church Benjamin',
 'Clarke Benjamin',
 'Cleverly Stephen',
 'Cochran John',
 'Colesworthy Gilbert',
 'Collier Gershom',
 'Collins Ezra',
 'Collson Adam',
 'Condy JamesFoster',
 'Cooper Samuel',
 'Cooper William',
 'Crafts Thomas',
 'Crane John',
 'Davis Caleb',
 'Davis Edward',
 'Davis Robert',
 'Davis William',
 'Dawes Thomas',
 'Dennie William',
 'Deshon Moses',
 'Dexter Samuel',
 'Dolbear Edward',
 'Doyle Peter',
 'Eaton Joseph',
 'Eayres Joseph',
 'Eckley Unknown',
 'Edes Benjamin',
 'Emmes Samuel',
 'Etheridge William',
 'Fenno Samuel',
 'Ferrell Ambrose',
 'Field Joseph',
 'Flagg Josiah',
 'Fleet Thomas',
 'Foster Bos',
 'Foster Samuel',
 'Frothingham Nathaniel',
 'Gammell John',
 'Gill Moses',
 'Gore Samuel',
 'Gould William',
 'Graham James',
 'Grant Moses',
 'Gray Wait',
 'Greene Nathaniel',
 'Greenleaf Joseph',
 'Greenleaf William',
 'Greenough Newn',
 'Ham William',
 'Hammond Samuel',
 'Hancock Eben',
 'Hancock John',
 'Hendley William',
 'Hewes George',
 'Hickling William',
 'Hicks John',
 'Hill Alexander',
 'Hitchborn Nathaniel',
 'Hitchborn Thomas',
 'Hobbs Samuel',
 'Hoffins John',
 'Holmes Nathaniel',
 'Hooton John',
 'Hopkins Caleb',
 'Hoskins William',
 'Howard Samuel',
 'Howe Edward',
 'Hunnewell Jonathan',
 'Hunnewell Richard',
 'Hunstable Thomas',
 'Hunt Abraham',
 'Ingersoll Daniel',
 'Inglish Alexander',
 'Isaac Pierce',
 'Ivers James',
 'Jarvis Edward',
 'Jarvis Charles',
 'Jefferds Unknown',
 'Jenkins John',
 'Johnston Eben',
 'Johonnott Gabriel',
 'Kent Benjamin',
 'Kerr Walter',
 'Kimball Thomas',
 'Kinnison David',
 'Lambert John',
 'Lee Joseph',
 'Lewis Phillip',
 'Lincoln Amos',
 'Loring Matthew',
 'Lowell John',
 'Machin Thomas',
 'Mackay William',
 'MacKintosh Capt',
 'MacNeil Archibald',
 'Marett Phillip',
 'Marlton John',
 'Marshall Thomas',
 'Marson John',
 'Mason Jonathan',
 'Matchett John',
 'May John',
 'McAlpine William',
 'Melville Thomas',
 'Merrit John',
 'Milliken Thomas',
 'Molineux William',
 'Moody Samuel',
 'Moore Thomas',
 'Morse Anthony',
 'Morton Perez',
 'Mountford Joseph',
 'Newell Eliphelet',
 'Nicholls Unknown',
 'Noyces Nat',
 'Obear Israel',
 'Otis James',
 'Palfrey William',
 'Palmer Joseph',
 'Palms Richard',
 'Parker Jonathan',
 'Parkman Elias',
 'Partridge Sam',
 'Payson Joseph',
 'Pearce IsaacJun',
 'Pearce Isaac',
 'Peck Samuel',
 'Peck Thomas',
 'Peters John',
 'Phillips John',
 'Phillips Samuel',
 'Phillips William',
 'Pierce William',
 'Pierpont Robert',
 'Pitts John',
 'Pitts Lendall',
 'Pitts Samuel',
 'Porter Thomas',
 'Potter Edward',
 'Powell William',
 'Prentiss Henry',
 'Prince John',
 'Prince Job',
 'Proctor Edward',
 'Pulling John',
 'Pulling Richard',
 'Purkitt Henry',
 'Quincy Josiah',
 'Randall John',
 'Revere Paul',
 'Roby Joseph',
 'Roylson Thomas',
 'Ruddock Abiel',
 'Russell John',
 'Russell William',
 'Sessions Robert',
 'Seward James',
 'Sharp Gibbens',
 'Shed Joseph',
 'Sigourney John',
 'Simpson Benjamin',
 'Slater Peter',
 'Sloper Ambrose',
 'Smith John',
 'Spear Thomas',
 'Sprague Samuel',
 'Spurr John',
 'Stanbridge Henry',
 'Starr James',
 'Stearns Phineas',
 'Stevens Ebenezer',
 'Stoddard Asa',
 'Stoddard Jonathan',
 'Story Elisha',
 'Swan James',
 'Sweetser John',
 'Symmes Eben',
 'Symmes John',
 'Tabor Philip',
 'Tileston Thomas',
 'Trott George',
 'Tyler Royall',
 'Urann Thomas',
 'Vernon Fortesque',
 'Waldo Benjamin',
 'Warren Joseph',
 'Webb Joseph',
 'Webster Thomas',
 'Welles Henry',
 'Wendell Oliver',
 'Wheeler Josiah',
 'White Samuel',
 'Whitten John',
 'Whitwell Samuel',
 'Whitwell William',
 'Williams Jeremiah',
 'Williams Jonathan',
 'Williams Thomas',
 'Willis Nathaniel',
 'Wingfield William',
 'Winslow John',
 'Winthrop John',
 'Wyeth Joshua',
 'Young Thomas']
dict(zip(clubs, clubs))
{'Boston Committee': 'Boston Committee',
 'London Enemies': 'London Enemies',
 'Long Room Club': 'Long Room Club',
 'Loyal Nine': 'Loyal Nine',
 'North Caucus': 'North Caucus',
 'St Andrews Lodge': 'St Andrews Lodge',
 'Tea Party': 'Tea Party'}
# How many connections does Boston Committee have coming out of it?
g.degree('Boston Committee')
21
[club for club in clubs]
['North Caucus',
 'Long Room Club',
 'Boston Committee',
 'London Enemies',
 'St Andrews Lodge',
 'Loyal Nine',
 'Tea Party']
[g.degree(club) for club in clubs]
[59, 17, 21, 62, 53, 10, 97]
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 12))

# 1. Create the graph
g = nx.from_pandas_dataframe(df, source='name', target='club') 

# 2. Create a layout for our nodes 
layout = nx.spring_layout(g,iterations=50)

# 3. Draw the parts we want
# Edges thin and grey
# People small and grey
# Clubs sized according to their number of connections
# Clubs blue
# Labels for clubs ONLY
# People who are highly connected are a highlighted color

# Go through every club name, ask the graph how many
# connections it has. Multiply that by 80 to get the circle size
club_size = [g.degree(club) * 80 for club in clubs]
nx.draw_networkx_nodes(g, 
                       layout, 
                       nodelist=clubs, 
                       node_size=club_size, # a LIST of sizes, based on g.degree
                       node_color='lightblue')

# Draw EVERYONE
nx.draw_networkx_nodes(g, layout, nodelist=people, node_color='#cccccc', node_size=100)

# Draw POPULAR PEOPLE
popular_people = [person for person in people if g.degree(person) > 1]
nx.draw_networkx_nodes(g, layout, nodelist=popular_people, node_color='orange', node_size=100)

nx.draw_networkx_edges(g, layout, width=1, edge_color="#cccccc")

node_labels = dict(zip(clubs, clubs))
nx.draw_networkx_labels(g, layout, labels=node_labels)

# 4. Turn off the axis because I know you don't want it
plt.axis('off')

plt.title("Revolutionary Clubs")

# 5. Tell matplotlib to show it
plt.show()

png

Final version

import matplotlib.pyplot as plt

plt.figure(figsize=(12, 12))

# 1. Create the graph
g = nx.from_pandas_dataframe(df, source='name', target='club') 

# 2. Create a layout for our nodes 
layout = nx.spring_layout(g,iterations=50)

# 3. Draw the parts we want
nx.draw_networkx_edges(g, layout, edge_color='#AAAAAA')

clubs = [node for node in g.nodes() if node in df.club.unique()]
size = [g.degree(node) * 80 for node in g.nodes() if node in df.club.unique()]
nx.draw_networkx_nodes(g, layout, nodelist=clubs, node_size=size, node_color='lightblue')

people = [node for node in g.nodes() if node in df.name.unique()]
nx.draw_networkx_nodes(g, layout, nodelist=people, node_size=100, node_color='#AAAAAA')

high_degree_people = [node for node in g.nodes() if node in df.name.unique() and g.degree(node) > 1]
nx.draw_networkx_nodes(g, layout, nodelist=high_degree_people, node_size=100, node_color='#fc8d62')

club_dict = dict(zip(clubs, clubs))
nx.draw_networkx_labels(g, layout, labels=club_dict)

# 4. Turn off the axis because I know you don't want it
plt.axis('off')

plt.title("Revolutionary Clubs")

# 5. Tell matplotlib to show it
plt.show()

png