Your data can be formatted in a few different ways, chief among them wide versus long.
Wide data looks like this:
State | 1960 | 1970 | 1980 | 1990 | 2000 |
---|---|---|---|---|---|
New York | 2 | 5 | 2 | 5 | 4 |
New Jersey | 3 | 1 | 4 | 1 | 5 |
Arizona | 3 | 9 | 8 | 7 | 5 |
While long data looks like this:
State | Year | Value |
---|---|---|
New York | 1960 | 2 |
New York | 1970 | 5 |
New York | 1980 | 2 |
New York | 1990 | 5 |
New York | 2000 | 4 |
New Jersey | 1960 | 3 |
New Jersey | 1970 | 1 |
New Jersey | 1980 | 4 |
… | … | … |
Which do you want? Depends on the application! I can think of exactly zero examples at the moment.
While you could convert between the two in pandas without too much work, I like to use the original data files in my work whenever I can. Not only does it decrease the number of mistakes you might make, but it allows you to easily update to a new release (of the census, for example) by just dropping in a new csv. Converting in d3 isn’t too tough, either.
Usually you can use part of these methods to get to where you’re going, but I’m just going full-out just in case.
Your data, which we’ll call long.csv
State | Year | Value |
---|---|---|
New York | 1960 | 2 |
New York | 1970 | 5 |
New Jersey | 1960 | 3 |
New Jersey | 1970 | 1 |
Arizona | 1960 | 7 |
Arizona | 1970 | 2 |
… | … | … |
Your data would look like
[
{ "State": "New York", "Year": 1960, "Value": 2 },
{ "State": "New York", "Year": 1970, "Value": 5 },
{ "State": "New Jersey", "Year": 1960, "Value": 3 },
// etc
]
d3.map()
queue
var state_map = d3.map();
queue().
defer(d3.csv, "../long.csv", function(row) {
// Try to get the state, if it doesn't exist make a new one
var datapoint = state_map.get(row["State"]) || {"State": row["State"]};
// row["Year"] becomes, say, 1960, and row["Value"] is 2, so really it's
// datapoint[1960] = 2;
datapoint[row["Year"]] = row["Value"];
// return the unadultered row to go be passed to ready
return row;
})
.await(ready);
Then, later…
state_map.get("New York");
// { "State": "New York", "1960": 2, "1970": 5 }
state_map.values();
// [
// { "State": "New York", "1960": 2, "1970": 5 },
// { "State": "New Jersey", "1960": 3, "1970": 1 },
// { "State": "Arizona", "1960": 7, "1970": 1 },
// ...
// ]
d3.map()
d3.nest()
d3.csv("../long.csv", function(error, long_data) {
// data looks like
// [
// { "State": "New York", "Year": 1960, "Value": 2 },
// { "State": "New York", "Year": 1970, "Value": 5 },
// { "State": "New Jersey", "Year": 1960, "Value": 3 },
// ... etc
var wide = d3.nest()
.key(function(d) { return d["State"] }) // sort by key
.rollup(function(d) { // do this to each grouping
// reduce takes a list and returns one value
// in this case, the list is all the grouped elements
// and the final value is an object with keys
return d.reduce(function(prev, curr) {
prev["State"] = curr["State"];
prev[curr["Year"]] = curr["Value"];
return prev;
}, {});
})
.entries(long_data) // tell it what data to process
.map(function(d) { // pull out only the values
return d.values;
});
})
wide
would look like
[
{ "State": "New York", "1960": 2, "1970": 5 },
{ "State": "New Jersey", "1960": 3, "1970": 1 },
{ "State": "Arizona", "1960": 7, "1970": 1 },
// etc
]
If you left off the last .map()
section, wide
would be keyed according to the state name, and would not be an array but instead look like this instead:
{
"New York": { "State": "New York", "1960": 2, "1970": 5 },
"New Jersey": { "State": "New Jersey", "1960": 3, "1970": 1 },
"Arizona": { "State": "Arizona", "1960": 7, "1970": 1 },
// etc
}
You can find out more about .reduce
over here
Your data, which we’ll call long.csv
State | 1960 | 1970 | 1980 | 1990 | 2000 |
---|---|---|---|---|---|
New York | 2 | 5 | 2 | 5 | 4 |
New Jersey | 3 | 1 | 4 | 1 | 5 |
Arizona | 3 | 9 | 8 | 7 | 5 |
Your data would look like
[
{ "../State": "New York", "1960": 2, "1970": 5 },
{ "State": "New Jersey", "1960": 3, "1970": 1 },
{ "State": "Arizona", "1960": 7, "1970": 1 },
...
]
queue
ready
JavaScript
var long_data = [];
queue().
defer(d3.csv, "wide.csv", function(row) {
// Loop through all of the columns, and for each column
// make a new row
Object.keys(row).forEach( function(colname) {
// Ignore 'State' and 'Value' columns
if(colname == "State" || colname == "Value") {
return
}
long_data.push({"State": row["State"], "Value": row[colname], "Year": colname});
});
return row;
})
.await(ready);
Then, later, long_data
would look like this:
[
{ "State": "New York", "Year": 1960, "Value": 2 },
{ "State": "New York", "Year": 1970, "Value": 5 },
{ "State": "New Jersey", "Year": 1960, "Value": 3 },
// etc
]
This is honestly the same thing as Method A just wrapped a little differently.
queue
JavaScript
d3.csv(d3.csv, "../wide.csv", function(wide_data) {
var long_data = [];
wide_data.forEach( function(row) {
// Loop through all of the columns, and for each column
// make a new row
Object.keys(row).forEach( function(colname) {
// Ignore 'State' and 'Value' columns
if(colname == "State" || colname == "Value") {
return
}
long_data.push({"State": row["State"], "Value": row[colname], "Year": colname});
});
});
// do magic with long_data down here
})
Then, later, long_data
would look like this:
[
{ "State": "New York", "Year": 1960, "Value": 2 },
{ "State": "New York", "Year": 1970, "Value": 5 },
{ "State": "New Jersey", "Year": 1960, "Value": 3 },
// etc
]