Created
July 5, 2014 21:57
-
-
Save johnchandlerbaldwin/24b6ebcc3f42af5d52dd to your computer and use it in GitHub Desktop.
My iPython notebook for my Actor Ranking personal project
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#Actor Ranking\n", | |
"\n", | |
"####by John Baldwin" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Introduction\n", | |
"\n", | |
"###This script creates a ranking system of actors. Here's how it works:\n", | |
"\n", | |
"1) We scrape data on box office earnings of movies from boxofficemojo.com.\n", | |
"\n", | |
"2) We tap into the Rotten Tomatoes API and draw a bunch of additional (actors, genres) data about said movies.\n", | |
"\n", | |
"3) We combine and manipulate the data, ultimately producing a ranking system of actors, sortable by movie genre.\n", | |
"\n", | |
"\n", | |
"Let's get started:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Let's start by importing the necessary packages:\n", | |
"\n", | |
"%matplotlib inline\n", | |
"\n", | |
"import json\n", | |
"\n", | |
"import BeautifulSoup\n", | |
"import lxml\n", | |
"import requests\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import matplotlib.pyplot as plt\n", | |
"import simplejson\n", | |
"import urllib\n", | |
"from rottentomatoes import RT\n", | |
"from pattern import web\n", | |
"import pattern\n", | |
"import psycopg2 as pg\n", | |
"import pandas.io.sql as sql\n", | |
"import datetime\n", | |
"\n", | |
"pd.set_option('display.width', 500)\n", | |
"pd.set_option('display.max_columns', 30)\n", | |
"\n", | |
"# set some nicer defaults for matplotlib\n", | |
"from matplotlib import rcParams\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 81 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Scraping Box Office Mojo" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#First, three functions we will use in a larger function below. get_xml reads in the HTML of a web page (we'll use\n", | |
"#this on the ~150 pages of Box Office Mojo's alphabetical listing of movies, pulling in data on ~15000 movies).\n", | |
"#title_split and date_split are functions that eliminate parts the HTML so we can get at the raw data.\n", | |
"\n", | |
"\n", | |
"def get_xml(url):\n", | |
" return(requests.get(url).text\n", | |
"\n", | |
"def title_split(txt):\n", | |
" if \"\"\"><b>\"\"\" in txt:\n", | |
" txt = txt.split(\"\"\"><b>\"\"\" ,1)[1]\n", | |
" return txt\n", | |
" else:\n", | |
" return txt\n", | |
" \n", | |
" \n", | |
"def date_split(txt):\n", | |
" if \"\"\"p=.htm\">\"\"\" in txt:\n", | |
" txt = txt.split(\"\"\"p=.htm\">\"\"\" ,1)[1]\n", | |
" return txt\n", | |
" else:\n", | |
" return txt\n", | |
" " | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#bom_page combines the functions above and then some, pulling HTML text data for a given web page and cleaning it\n", | |
"#into the format we are looking for.\n", | |
"\n", | |
"def bom_page(url):\n", | |
" bom_xml = get_xml(url)\n", | |
" \n", | |
" dom = web.Element(bom_xml)\n", | |
" \n", | |
" bom_list = []\n", | |
" for i in dom.by_tag('font'):\n", | |
" bom_list.append(str(i))\n", | |
" \n", | |
" #text cleaning\n", | |
" z = [x for x in bom_list if '<font size=\"2\">' in x]\n", | |
" z = [title_split(x) for x in z]\n", | |
" z = [date_split(x) for x in z]\n", | |
" z = [x.replace('<font size=\"2\">','').replace('font>','') for x in z]\n", | |
" z = [x.replace('</b></a></', '').replace('</a></', '').replace('</','') for x in z]\n", | |
" \n", | |
" composite_z = [z[x:x+7] for x in range(0, len(z),7)]\n", | |
" \n", | |
" return pd.DataFrame(composite_z)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#We'll now collect all the URLs of the ~150 web pages we plan to scrape and combine them into a dataframe for later use.\n", | |
"\n", | |
"\n", | |
"bom_df = pd.DataFrame()\n", | |
"\n", | |
"page_letters = ['NUM','A','B','C','D','E','F','G','H','I','J','K',\n", | |
" 'L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']\n", | |
"page_numbers = ['1','2','3','4','5','6','7','8','9','10','11','12','13']\n", | |
"\n", | |
"bom_urls = []\n", | |
"\n", | |
"for i in page_letters:\n", | |
" for j in page_numbers:\n", | |
" bom_urls.append(\"\"\"http://www.boxofficemojo.com/movies/alphabetical.htm?letter=\"\"\" + i + \"\"\"&page=\"\"\" + j + \"\"\"&p=.htm\"\"\")\n", | |
" \n", | |
" " | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Now we run the bom_page function defined above on the URLs we collected, generating the dataframe we need.\n", | |
"\n", | |
"bom_df = pd.DataFrame()\n", | |
"\n", | |
"for url in bom_urls:\n", | |
" bom_df = pd.concat([bom_df,bom_page(url)])\n", | |
" " | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Drop duplicates, reset index, set column titles\n", | |
"\n", | |
"bom_df = bom_df.drop_duplicates()\n", | |
"bom_df = bom_df.reset_index(drop=True)\n", | |
"\n", | |
"bom_df.columns=('title','studio','domestic_gross',\n", | |
" 'domestic_theatres','open_gross','open_theatres','date')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Define a new data frame for further use, preserving the previously generated bom_df object\n", | |
"\n", | |
"boxoffice_df = bom_df" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Further cleaning, making numerical data interpretable\n", | |
"\n", | |
"boxoffice_df[['domestic_gross','domestic_theatres','open_gross',\n", | |
" 'open_theatres']] = (boxoffice_df[['domestic_gross','domestic_theatres','open_gross',\n", | |
" 'open_theatres']].applymap(lambda x: float(x.replace('$','')\n", | |
" .replace(',','').replace('n/a','0').replace('*','0'))))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Reduce each date item to the last 4 characters (which should be the year, if it exists)\n", | |
"\n", | |
"boxoffice_df['date'] = boxoffice_df['date'].apply(lambda x: x[-4:])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 14 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Find 4-number sequence (or range of years) in a given date\n", | |
"#If it exists, that is now the date\n", | |
"\n", | |
"for i in range(0,len(boxoffice_df['date'])):\n", | |
" try:\n", | |
" boxoffice_df['date'][i] = datetime.datetime.strptime(boxoffice_df['date'][i], '%Y').year\n", | |
" except:\n", | |
" boxoffice_df = boxoffice_df.drop(i)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 15 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#A few further clerical things:\n", | |
"\n", | |
"boxoffice_df['title'] = boxoffice_df['title'].apply(lambda x: x.replace('&','&'))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 16 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#The dataframe is prepared. Now I'm going to subset it to only films since 2008 and only ones that grossed more than\n", | |
"#$1,000,000, giving us only financially relevant movies from the last 5 years.\n", | |
"\n", | |
"bodf_clean = boxoffice_df[(boxoffice_df['date'] >= 2008) & (boxoffice_df['domestic_gross'] > 1000000)]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 17 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Generating Data From the Rotten Tomatoes API" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Read in a list of the titles you generated from scraping Box Office Mojo\n", | |
"\n", | |
"titles_list = bodf_clean['title']\n", | |
"\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 85 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Create an empty dataframe to hold the Rotten Tomatoes data\n", | |
"\n", | |
"\n", | |
"tomatoes_df = pd.DataFrame(np.array([np.arange(len(titles_list))]*16).T, columns=['title_original','title','year','audience_score',\n", | |
" 'critics_score','critics_rating',\n", | |
" 'actor_1','actor_2','actor_3',\n", | |
" 'actor_4','actor_5','genre_1',\n", | |
" 'genre_2','genre_3','genre_4',\n", | |
" 'genre_5'])\n", | |
"\n", | |
"\n", | |
"#Setting the data types:\n", | |
"tomatoes_df[['title_original','title','critics_rating','actor_1',\n", | |
" 'actor_2','actor_3','actor_4',\n", | |
" 'actor_5','genre_1','genre_2',\n", | |
" 'genre_3','genre_4','genre_5']] = tomatoes_df[['title_original','title','critics_rating',\n", | |
" 'actor_1','actor_2','actor_3',\n", | |
" 'actor_4','actor_5','genre_1',\n", | |
" 'genre_2','genre_3','genre_4',\n", | |
" 'genre_5']].astype(str)\n", | |
"\n", | |
"tomatoes_df[['year','audience_score','critics_score']] = tomatoes_df[['year','audience_score',\n", | |
" 'critics_score']].astype(float)\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 86 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Rotten Tomatoes API parameters (You'll have to register for their API and generate your own API key if you\n", | |
"#want to do this one at home, folks)\n", | |
"\n", | |
"api_key = '************************'\n", | |
"options = {'apikey': api_key}\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 87 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Creates the function to query the Rotten Tomatoes' API and add data about a movie into the data frame.\n", | |
"\n", | |
"\n", | |
"def add_movie(movie_title):\n", | |
" \n", | |
" #Get initial info about movie\n", | |
" movie = RT(api_key).search(movie_title)\n", | |
" \n", | |
" #Query links.self attribute to gain more info\n", | |
" movie = requests.get(str(movie[0]['links']['self']), params=options).json()\n", | |
" \n", | |
" \n", | |
" movie_elements = [str(movie_title),\n", | |
" str(movie['title']),\n", | |
" float(movie['year']),\n", | |
" float(movie['ratings']['audience_score']),\n", | |
" float(movie['ratings']['critics_score']),\n", | |
" str(movie['ratings']['critics_rating'])\n", | |
" ]\n", | |
" \n", | |
" #Loads 5 actors into the data frame.\n", | |
" movie_elements[6:11] = ['None','None','None','None','None']\n", | |
"\n", | |
" for i in range(0,len(movie['abridged_cast'])):\n", | |
" movie_elements[i+6] = str(movie['abridged_cast'][i].get('name'))\n", | |
" \n", | |
" #Loads x genres into the data frame.\n", | |
" movie_elements[12:16] = ['None','None','None','None','None']\n", | |
" \n", | |
" for i in range(0,len(movie['genres'])):\n", | |
" movie_elements[i+11] = str(movie['genres'][i])\n", | |
" \n", | |
" return movie_elements" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 88 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Iterates through the list of titles, running the above add_movie function.\n", | |
"#NOTE: THIS STEP WILL TAKE A WHILE\n", | |
"\n", | |
"for i in range(0,len(titles_list.values)):\n", | |
" try:\n", | |
" tomatoes_df.ix[i] = add_movie(titles_list.values[i])\n", | |
" except:\n", | |
" pass" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 89 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Combining the Box Office Mojo and Rotten Tomatoes Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Combine the two dataframes on the original (Box Office Mojo) titles:\n", | |
"\n", | |
"movie_df = tomatoes_df.merge(bodf_clean, left_on='title_original', right_on=\"title\", how='left')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 108 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Data Manipulation Part 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Creates an array of all unique actors\n", | |
"\n", | |
"actors = pd.concat([movie_df['actor_1'],movie_df['actor_2'],movie_df['actor_3'],\n", | |
" movie_df['actor_4'],movie_df['actor_5']])\n", | |
"\n", | |
"actors = pd.Series(actors.values.ravel()).unique()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 109 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#This takes an actor and generates a dataframe with actors' name on the left and each movie they're\n", | |
"#in and the associated info as rows, and then concatenates it onto an outside dataframe holding the rest.\n", | |
"\n", | |
"def add_films(actor):\n", | |
" actor_rows = pd.concat([movie_df[movie_df['actor_1'] == actor],\n", | |
" movie_df[movie_df['actor_2'] == actor],\n", | |
" movie_df[movie_df['actor_3'] == actor],\n", | |
" movie_df[movie_df['actor_4'] == actor],\n", | |
" movie_df[movie_df['actor_5'] == actor]]\n", | |
" )\n", | |
" \n", | |
" actor_rows.insert(0, 'actor_index', actor)\n", | |
" \n", | |
" return actor_rows" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 110 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#We run this, and we get the combined data frame actors_movies.\n", | |
"\n", | |
"actors_movies = []\n", | |
"for actor in actors:\n", | |
" actors_movies.append(add_films(actor))\n", | |
" \n", | |
"actors_movies = pd.concat(actors_movies)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 112 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Join this correctly\n", | |
"\n", | |
"#actors_movies = actors_movies.merge(social_media_df, on='actor_index', how='left')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 33 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Choosing the Genre" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Using this option, we can subset the actor ranking to only look at actors in films of certain genres.\n", | |
"\n", | |
"actors_movies = pd.concat([actors_movies[actors_movies['genre_1'] == 'Comedy'],\n", | |
" actors_movies[actors_movies['genre_2'] == 'Comedy'],\n", | |
" actors_movies[actors_movies['genre_3'] == 'Comedy'],\n", | |
" actors_movies[actors_movies['genre_4'] == 'Comedy'],\n", | |
" actors_movies[actors_movies['genre_5'] == 'Comedy']]\n", | |
" )" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 113 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Data Manipulation Part 2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Dataframe actors_insights will hold our final insights; we'll use it below.\n", | |
"\n", | |
"actors_insights = pd.DataFrame(actors, columns=['actor_index'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 121 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Okay, so here we're going to take a sum of the domestic gross and Rotten Tomatoes scores for\n", | |
"#every movie that every actor has been in.\n", | |
"\n", | |
"col = ['domestic_gross','audience_score','critics_score']\n", | |
"\n", | |
"\n", | |
"for c in col:\n", | |
"\n", | |
" l = []\n", | |
" \n", | |
" for actor in actors_insights['actor_index']:\n", | |
" try:\n", | |
" l.append(sum(actors_movies[actors_movies['actor_index'] == actor][c]))\n", | |
" except:\n", | |
" pass\n", | |
" \n", | |
" actors_insights[c] = l" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 123 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Replace all NaN with 0 for ranking purposes\n", | |
"\n", | |
"actors_insights = actors_insights.fillna(0)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 124 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Change values to rankings to compare between actors:\n", | |
"\n", | |
"actors_insights['domestic_gross_rank'] = actors_insights['domestic_gross'].rank()\n", | |
"actors_insights['audience_score_rank'] = actors_insights['audience_score'].rank()\n", | |
"actors_insights['critics_score_rank'] = actors_insights['critics_score'].rank()\n", | |
"\n", | |
"actors_insights['averaged_ranks'] = (actors_insights['domestic_gross_rank'] +\n", | |
" actors_insights['audience_score_rank'] +\n", | |
" actors_insights['critics_score_rank'] / 3)\n", | |
"\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 125 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#And here we have the result. The top 10 actors in comedy films since 2009 (as ranked by my system):\n", | |
"\n", | |
"actors_insights.sort('averaged_ranks', ascending=False)[:10]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>actor_index</th>\n", | |
" <th>domestic_gross</th>\n", | |
" <th>audience_score</th>\n", | |
" <th>critics_score</th>\n", | |
" <th>domestic_gross_rank</th>\n", | |
" <th>audience_score_rank</th>\n", | |
" <th>critics_score_rank</th>\n", | |
" <th>averaged_ranks</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>238</th>\n", | |
" <td> Steve Carell</td>\n", | |
" <td> 2394169642</td>\n", | |
" <td> 750</td>\n", | |
" <td> 787</td>\n", | |
" <td> 3201</td>\n", | |
" <td> 3204.0</td>\n", | |
" <td> 3205</td>\n", | |
" <td> 7473.333333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>61 </th>\n", | |
" <td> Will Ferrell</td>\n", | |
" <td> 2077108439</td>\n", | |
" <td> 608</td>\n", | |
" <td> 682</td>\n", | |
" <td> 3200</td>\n", | |
" <td> 3201.0</td>\n", | |
" <td> 3201</td>\n", | |
" <td> 7468.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19 </th>\n", | |
" <td> Jonah Hill</td>\n", | |
" <td> 995021485</td>\n", | |
" <td> 760</td>\n", | |
" <td> 781</td>\n", | |
" <td> 3193</td>\n", | |
" <td> 3205.0</td>\n", | |
" <td> 3204</td>\n", | |
" <td> 7466.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>415</th>\n", | |
" <td> Paul Rudd</td>\n", | |
" <td> 1650882453</td>\n", | |
" <td> 545</td>\n", | |
" <td> 615</td>\n", | |
" <td> 3199</td>\n", | |
" <td> 3198.0</td>\n", | |
" <td> 3200</td>\n", | |
" <td> 7463.666667</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>153</th>\n", | |
" <td> Kristen Wiig</td>\n", | |
" <td> 940018977</td>\n", | |
" <td> 635</td>\n", | |
" <td> 710</td>\n", | |
" <td> 3190</td>\n", | |
" <td> 3203.0</td>\n", | |
" <td> 3202</td>\n", | |
" <td> 7460.333333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>889</th>\n", | |
" <td> Zach Galifianakis</td>\n", | |
" <td> 1075365424</td>\n", | |
" <td> 514</td>\n", | |
" <td> 487</td>\n", | |
" <td> 3196</td>\n", | |
" <td> 3197.0</td>\n", | |
" <td> 3196</td>\n", | |
" <td> 7458.333333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>373</th>\n", | |
" <td> Ben Stiller</td>\n", | |
" <td> 1008470644</td>\n", | |
" <td> 494</td>\n", | |
" <td> 491</td>\n", | |
" <td> 3195</td>\n", | |
" <td> 3194.5</td>\n", | |
" <td> 3197</td>\n", | |
" <td> 7455.166667</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>175</th>\n", | |
" <td> Owen Wilson</td>\n", | |
" <td> 966513141</td>\n", | |
" <td> 580</td>\n", | |
" <td> 442</td>\n", | |
" <td> 3191</td>\n", | |
" <td> 3200.0</td>\n", | |
" <td> 3192</td>\n", | |
" <td> 7455.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>91 </th>\n", | |
" <td> Jason Bateman</td>\n", | |
" <td> 793896697</td>\n", | |
" <td> 562</td>\n", | |
" <td> 505</td>\n", | |
" <td> 3187</td>\n", | |
" <td> 3199.0</td>\n", | |
" <td> 3198</td>\n", | |
" <td> 7452.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>379</th>\n", | |
" <td> Bradley Cooper</td>\n", | |
" <td> 998605116</td>\n", | |
" <td> 479</td>\n", | |
" <td> 372</td>\n", | |
" <td> 3194</td>\n", | |
" <td> 3193.0</td>\n", | |
" <td> 3181</td>\n", | |
" <td> 7447.333333</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 132, | |
"text": [ | |
" actor_index domestic_gross audience_score critics_score domestic_gross_rank audience_score_rank critics_score_rank averaged_ranks\n", | |
"238 Steve Carell 2394169642 750 787 3201 3204.0 3205 7473.333333\n", | |
"61 Will Ferrell 2077108439 608 682 3200 3201.0 3201 7468.000000\n", | |
"19 Jonah Hill 995021485 760 781 3193 3205.0 3204 7466.000000\n", | |
"415 Paul Rudd 1650882453 545 615 3199 3198.0 3200 7463.666667\n", | |
"153 Kristen Wiig 940018977 635 710 3190 3203.0 3202 7460.333333\n", | |
"889 Zach Galifianakis 1075365424 514 487 3196 3197.0 3196 7458.333333\n", | |
"373 Ben Stiller 1008470644 494 491 3195 3194.5 3197 7455.166667\n", | |
"175 Owen Wilson 966513141 580 442 3191 3200.0 3192 7455.000000\n", | |
"91 Jason Bateman 793896697 562 505 3187 3199.0 3198 7452.000000\n", | |
"379 Bradley Cooper 998605116 479 372 3194 3193.0 3181 7447.333333" | |
] | |
} | |
], | |
"prompt_number": 132 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Results:" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Top 10 Actors in Comedic Films, 2009-2014:\n", | |
"\n", | |
" 1) Steve Carell\n", | |
" 2) Will Ferrell\n", | |
" 3) Jonah Hill\n", | |
" 4) Paul Rudd\n", | |
" 5) Kristen Wiig\n", | |
" 6) Zach Galifianakis\n", | |
" 7) Ben Stiller\n", | |
" 8) Owen Wilson\n", | |
" 9) Jason Bateman\n", | |
" 10) Bradley Cooper\n", | |
" \n", | |
"Thanks for checking out my script! I hope you found it interesting. All the best. \n", | |
"\n", | |
"John" | |
] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment