Data Visualization Exercise with Star Wars dataset

star%20wars.png

In this notebook, I will be practicing some data visualization using seaborn and matplot lib. I tried to replicate this Star Wars EDA - https://www.kaggle.com/xvivancos/eda-star-wars-data-set/report - which used R as its language and created similar visualizations but in Python. Dataset used is from Kaggle: https://www.kaggle.com/jsphyg/star-wars

In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[1]:
In [2]:
import numpy as np
import pandas as pd
from pandas import *
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
from matplotlib._png import read_png
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
In [3]:
df2 = pd.read_csv('..\star wars data\starships.csv')
ships_length = df2[['name', 'cost_in_credits']]
ships_length = df2.nsmallest(5,'cost_in_credits',keep='last')
ships_length

# Specify bar colors
fig_dims = (15, 6)
fig, ax = plt.subplots(figsize=fig_dims, dpi=80)
plt.bar(ships_length['name'], ships_length['cost_in_credits'], color='lightblue')

# Customize the chart
ax.set_title('Cheapest Starships', color='black', fontweight='bold', fontsize=15,)
ax.xaxis.grid(False)
ax.set_ylabel('Cost in Credits', fontsize=14, color='black')
plt.yticks(fontsize=12)
plt.xticks(fontsize=12)

# Add Millennium Falcon image
Millennium_Falcon = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\Millennium_Falcon.png')
imagebox_Millennium_Falcon = OffsetImage(Millennium_Falcon, zoom=0.09)
xy = [0.2,145000]
ab_Millennium_Falcon = AnnotationBbox(imagebox_Millennium_Falcon,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_Millennium_Falcon)

# Add arrow image
arrow = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\arrow4.png')
imagebox_arrow = OffsetImage(arrow, zoom=0.25)
xy = [0.9, 135900]
ab_arrow = AnnotationBbox(imagebox_arrow,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_arrow);
In [4]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[4]:
In [5]:
df2 = pd.read_csv('..\star wars data\starships.csv')
ships_length = df2[['name', 'length']]
ships_length = df2.nlargest(5,'length',keep='first')
ships_length

# Specify bar colors
fig_dims = (15, 6)
fig, ax = plt.subplots(figsize=fig_dims, dpi=80)
plt.bar(ships_length['name'], ships_length['length'], color='lightblue')

# Customize the chart
ax.set_title('Top 5 Starships by Vehicle Length', color='black', fontweight='bold', fontsize=15,)
ax.xaxis.grid(False)
ax.set_ylabel('Length', fontsize=14, color='black')
plt.yticks(fontsize=12)
plt.xticks(fontsize=12)

# Add Death Star image
deathstar = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\stardeath.png')
imagebox_deathstar = OffsetImage(deathstar, zoom=0.18)
xy = [1.10,85000]
ab_deathstar = AnnotationBbox(imagebox_deathstar,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_deathstar)

# Add arrow image
arrow = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\download.png')
imagebox_arrow = OffsetImage(arrow, zoom=0.30)
xy = [0.5, 115000]
ab_arrow = AnnotationBbox(imagebox_arrow,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_arrow);
In [6]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[6]:
In [7]:
fig_dims = (13, 7)
fig, ax = plt.subplots(figsize=fig_dims,  dpi=80)

plt.style.use('tableau-colorblind10') #style.available
sns.scatterplot(data = df2, x='cost_in_credits', y='max_atmosphering_speed', marker="o",s=150)

plt.yticks(fontsize=12)
plt.xticks(fontsize=12)
ax.set_xlabel('Cost in Credits', color='#006680', fontsize=15, fontweight='bold')
ax.set_ylabel('Max Atmosphering Speed', color='#006680', fontsize=15, fontweight='bold')
ax.set_title('Cost in Credits against Maximum Speed', color='#006680', fontweight='bold', fontsize=20);

# Add Death Star image
jtype = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\jtype.png')
imagebox_jtype = OffsetImage(jtype, zoom=0.25)
xy = [55000000,1750]
ab_jtype = AnnotationBbox(imagebox_jtype,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_jtype)

# Add Theta image
theta = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\theta.png')
imagebox_theta = OffsetImage(theta, zoom=0.40)
xy = [109000000,1800]
ab_theta = AnnotationBbox(imagebox_theta,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_theta)

# Add arrow image
arrow = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\download.png')
imagebox_arrow = OffsetImage(arrow, zoom=0.30)
xy = [10000000, 1980]
ab_arrow = AnnotationBbox(imagebox_arrow,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_arrow)

# Starship Label
plt.text(40550000,1380, 'J-type Diplomatic Barge', fontweight='bold', fontsize = 15, color='#1f77b4')
plt.text(95500000,1380, 'Theta-class T-2C Shuttle', fontweight='bold', fontsize = 15, color='#1f77b4');
In [8]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[8]:
In [9]:
fig_dims = (13, 7)
fig, ax = plt.subplots(figsize=fig_dims,  dpi=80)

plt.style.use('tableau-colorblind10') #style.available
sns.scatterplot(data = df2, x='cost_in_credits', y='length', marker="o",s=150)

plt.yticks(fontsize=12)
plt.xticks(fontsize=12)    
ax.set_xlabel('Cost in Credits', color='#006680', fontsize=15, fontweight='bold')
ax.set_ylabel('Vehicle Length', color='#006680', fontsize=15, fontweight='bold')
ax.set_title('Cost in Credits against Vehicle Length', color='#006680', fontweight='bold', fontsize=20)

# Add Death Star image
deathstar = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\stardeath.png')
imagebox_deathstar = OffsetImage(deathstar, zoom=0.18)
xy = [708550000000,75500]
ab_deathstar = AnnotationBbox(imagebox_deathstar,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_deathstar)

# Add arrow image
arrow = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\arrow7.png')
imagebox_arrow = OffsetImage(arrow, zoom=0.15)
xy = [856900000000, 110000]
ab_arrow = AnnotationBbox(imagebox_arrow,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_arrow)

# Starship Label
plt.text(688550000000,32500, 'Death Star', fontweight='bold', fontsize = 15, color='#1f77b4');
In [10]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[10]:
In [11]:
fig_dims = (13, 7)
fig, ax = plt.subplots(figsize=fig_dims,  dpi=80)

plt.style.use('tableau-colorblind10') #style.available
sns.scatterplot(data = df2, x='passengers', y='cargo_capacity', marker="o",s=150)
plt.yticks(fontsize=12)
plt.xticks(fontsize=12)
ax.set_xlabel('Number of Passengers', color='#006680', fontsize=15, fontweight='bold')
ax.set_ylabel('Cargo Capacity', color='#006680', fontsize=15, fontweight='bold')
ax.set_title('Number of Passengers against Cargo Capacity', color='#006680', fontweight='bold', fontsize=20)

# Add Death Star image
deathstar = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\stardeath.png')
imagebox_deathstar = OffsetImage(deathstar, zoom=0.18)
xy = [600000,650000000000]
ab_deathstar = AnnotationBbox(imagebox_deathstar,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_deathstar)

# Add arrow image
arrow = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\arrow7.png')
imagebox_arrow = OffsetImage(arrow, zoom=0.15)
xy = [720000, 930000000000]
ab_arrow = AnnotationBbox(imagebox_arrow,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_arrow)

# Starship Label
plt.text(590000,300000000000, 'Death Star', fontweight='bold', fontsize = 15, color='#1f77b4');
In [12]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[12]:
In [13]:
star_class = df2['starship_class'].value_counts().sort_values(ascending=True)
fig_dims = (13, 7)
fig, ax = plt.subplots(figsize=fig_dims,  dpi=80)
star_class.plot(kind='barh')
plt.yticks(fontsize=12)
plt.xticks(fontsize=12)
ax.set_title('Count of Starships by Class', color='#006680', fontweight='bold', fontsize=20)

# Add Starfighters image
starfighters = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\starfighters.png')
imagebox_starfighters = OffsetImage(starfighters, zoom=0.17)
xy = [5.8,13]
ab_starfighters = AnnotationBbox(imagebox_starfighters,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_starfighters)

# Add arrow image
arrow = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\arrow7.png')
imagebox_arrow = OffsetImage(arrow, zoom=0.15)
xy = [7.8, 21]
ab_arrow = AnnotationBbox(imagebox_arrow,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_arrow)
Out[13]:
<matplotlib.offsetbox.AnnotationBbox at 0x1c06e9eed30>
In [14]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[14]:
In [15]:
# To merge top 10 tallest and top 10 shortest
df = pd.read_csv('..\star wars data\characters.csv')
tallest = df.nlargest(10,'height',keep='first')
shortest = df.nsmallest(10, 'height', keep='last')
height = tallest.append(shortest).sort_values('height', ascending=False)

# Specify bar colors for tall group and short group
x = height['name']
y = height['height']
tallest = y > 200
shortest = y < 200
fig_dims = (15, 6)
fig, ax = plt.subplots(figsize=fig_dims, dpi=80)
plt.bar(x[tallest], y[tallest], color='darkolivegreen', width=0.8)
plt.bar(x[shortest], y[shortest], color='yellowgreen', width=0.8)

# Customize the chart
ax.set_title('Tallest and shortest characters in Star Wars movies', color='black', fontweight='bold', fontsize=15,)
ax.xaxis.grid(False)
ax.set_ylabel('Height (cm)', fontsize=14, color='black')
plt.yticks(fontsize=12, rotation=0)
plt.xticks(fontsize=12, rotation=90)
plt.plot([9.5, 9.5], [0, 350], linestyle='--', color='red')

plt.text(1.5,320, 'Top 10 Tallest Characters', fontweight='bold', fontsize = 15, color='darkolivegreen')
plt.text(12,320, 'Top 10 Shortest Characters', fontweight='bold', fontsize = 15, color='yellowgreen')

# Add Yoda image
yoda = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\yoda.png')
imagebox_yoda = OffsetImage(yoda, zoom=0.23)
xy = [14.3,240]
ab_yoda = AnnotationBbox(imagebox_yoda,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_yoda)

# Add arrow image
arrow = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\arrow4.png')
imagebox_arrow = OffsetImage(arrow, zoom=0.30)
xy = [17.3,180]
ab_arrow = AnnotationBbox(imagebox_arrow,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_arrow)

plt.show()
In [16]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[16]:
In [17]:
# Top 10 tallest and top 10 shortest by mean height per homeworld
home_height = df[['homeworld', 'height']].groupby('homeworld').mean()
home_tallest = home_height.nlargest(10,'height',keep='first').reset_index()
home_shortest = home_height.nsmallest(10, 'height', keep='last').reset_index()
home_height = home_tallest.append(home_shortest).sort_values('height', ascending=False)

# Specify bar colors for tall group and short group
x = home_height['homeworld']
y = home_height['height'] 
home_tallest = y > 170
home_shortest = y < 170
fig_dims = (15, 6)
fig, ax = plt.subplots(figsize=fig_dims, dpi=80)
plt.bar(x[home_tallest], y[home_tallest], color='darkolivegreen', width=0.8)
plt.bar(x[home_shortest], y[home_shortest], color='yellowgreen', width=0.8)

# Customize the chart
ax.set_title('Mean height by homeworld', color='black', fontweight='bold', fontsize=15,)
ax.xaxis.grid(False)
ax.set_ylabel('Height (cm)', fontsize=14, color='black')
plt.yticks(fontsize=12, rotation=0)
plt.xticks(fontsize=12, rotation=90)
plt.plot([9.5, 9.5], [0, 350], linestyle='--', color='red')

plt.text(1.5,320, 'Top 10 Tallest Homeworlds', fontweight='bold', fontsize = 15, color='darkolivegreen')
plt.text(12,320, 'Top 10 Shortest Homeworlds', fontweight='bold', fontsize = 15, color='yellowgreen');
In [18]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[18]:
In [19]:
# Individual weights
weight2 = df[['name', 'mass']]
weight2["mass"] = pd.to_numeric(weight2["mass"], errors='coerce')
heaviest = weight2.nlargest(11,'mass',keep='first')
lightest = weight2.nsmallest(10, 'mass', keep='last')
weight3 = heaviest.append(lightest).sort_values('mass', ascending=False)
weight3 = weight3.iloc[1:]
x = weight3['name']
y = weight3['mass']
heaviest = y > 50
lightest = y < 50
fig_dims = (15, 6)
fig, ax = plt.subplots(figsize=fig_dims, dpi=80)
plt.bar(x[heaviest], y[heaviest], color='#532e1c', width=0.8)
plt.bar(x[lightest], y[lightest], color='#c5a880', width=0.8)

# Customize the chart
ax.set_title('Heaviest and least heavy characters in the movies (excluding Jabba the Hutt)', color='black', fontweight='bold', fontsize=15,)
ax.xaxis.grid(False)
ax.set_ylabel('Weight', fontsize=14, color='black')
plt.yticks(fontsize=12, rotation=0)
plt.xticks(fontsize=12, rotation=90)
plt.plot([9.5, 9.5], [0, 350], linestyle='--', color='red')
plt.text(1.5,320, 'Top 10 heaviest characters', fontweight='bold', fontsize = 15, color='#532e1c')
plt.text(12,320, 'Top 10 least heavy characters', fontweight='bold', fontsize = 15, color='#c5a880')

# Add r2 image
r2 = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\r2.png')
imagebox_r2 = OffsetImage(r2, zoom=0.15)
xy = [13.5,175]
ab_r2 = AnnotationBbox(imagebox_r2,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_r2)

# Add arrow image
arrow = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\arrow4.png')
imagebox_arrow = OffsetImage(arrow, zoom=0.15)
xy = [14.6,105]
ab_arrow = AnnotationBbox(imagebox_arrow,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_arrow)

# Add chewb image
chewb = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\chewb.png')
imagebox_chewb = OffsetImage(chewb, zoom=0.11)
xy = [6.8,240]
ab_chewb = AnnotationBbox(imagebox_chewb,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_chewb)

# Add arrow image
arrow = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\arrow5.png')
imagebox_arrow = OffsetImage(arrow, zoom=0.15)
xy = [5.6,180]
ab_arrow = AnnotationBbox(imagebox_arrow,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_arrow)

# Add darth image
darth = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\darth.png')
imagebox_darth = OffsetImage(darth, zoom=0.20)
xy = [2.9,265]
ab_darth = AnnotationBbox(imagebox_darth,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_darth)

# Add arrow image
arrow = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\arrow5.png')
imagebox_arrow = OffsetImage(arrow, zoom=0.15)
xy = [1.6,204]
ab_arrow = AnnotationBbox(imagebox_arrow,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_arrow)

plt.show();
In [20]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[20]:
In [21]:
# Weight distribution
weight = df[['name', 'mass']]
weight["mass"] = pd.to_numeric(weight["mass"], errors='coerce')
ax = weight['mass'].hist(bins=20, edgecolor='w', color='#532e1c',  figsize=(17, 6))#by default bins=10
ax.spines['right'].set_visible(False) #removing outer box
ax.spines['top'].set_visible(False) #removing outer box
ax.xaxis.grid(False)
ax.yaxis.grid(False)

ax.set_title('Character distribution by weight (literally!)', color='black', fontweight='bold', fontsize=15,)
ax.xaxis.grid(False)
ax.set_ylabel('Count', fontsize=14, color='black')
ax.set_xlabel('Weight', fontsize=14, color='black')
plt.yticks(fontsize=12, rotation=0)
plt.xticks(fontsize=12, rotation=90)
ax.text(950, 35, r'Jabba the Hutt', fontsize=15)

# Add Jabba image
jabba = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\jabba2.png')
imagebox_jabba = OffsetImage(jabba, zoom=0.06)
xy = [1000,25]
ab_jabba = AnnotationBbox(imagebox_jabba,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_jabba)

# Add arrow image
arrow = read_png(r'C:\Users\ACER\Desktop\Maico - Files\Data Science\Portfolio\star wars data\pics\arrow4.png')
imagebox_arrow = OffsetImage(arrow, zoom=0.30)
xy = [1230,12]
ab_arrow = AnnotationBbox(imagebox_arrow,xy,xybox=(30.,-30.), boxcoords='offset points')
ax.add_artist(ab_arrow)

plt.show();