Module dataMovies.serializers.imdbSerializers
Expand source code
#!/usr/bin/env python3
import pandas as pd
from .utils.dictionaryTransform import nested_dict_to_list
def imdb_reviews_to_df(reviews):
"""Transform reviews to pandas.DataFrame() format.
Args:
reviews: a python dictionary corresponding to reviews
Returns:
A pandas.DataFrame() of the reviews.
"""
features = ['items', 'imDbId', 'title', 'fullTitle', 'type', 'year']
if not all(feature in reviews for feature in features):
return pd.DataFrame()
reviews_df = pd.json_normalize(
reviews,
features[0],
features[1:],
record_prefix='review_'
)
return reviews_df
def imdb_users_ratings_to_df(users_ratings, stat):
"""Transform reviews to pandas.DataFrame() format.
Args:
users_ratings: a python dictionary corresponding to users_ratings
stat (optional): string from {'ratings', 'demographicMales', 'demographicFemales', 'demographicAll'} (e.g. demographicAll).
'ratings' (default): shows the rating notes distribution among all voters.
'demographicMales': shows ratings per age range for males.
'demographicFemales': shows ratings per age range for females.
'demographicAll': shows ratings per age range for males and females.
Returns:
A pandas.DataFrame() of the users_ratings.
"""
features = ['imDbId', 'title', 'fullTitle', 'type', 'year', 'totalRating', 'totalRatingVotes', 'ratings', 'demographicAll', 'demographicMales', 'demographicFemales', stat]
if not all(feature in users_ratings for feature in features):
return pd.DataFrame()
users_ratings = nested_dict_to_list(users_ratings, 'demographicAll', 'ageRange')
users_ratings = nested_dict_to_list(users_ratings, 'demographicMales', 'ageRange')
users_ratings = nested_dict_to_list(users_ratings, 'demographicFemales', 'ageRange')
users_ratings_df = pd.json_normalize(
users_ratings,
stat,
features[:7],
record_prefix='review_'
)
return users_ratings_df
Functions
def imdb_reviews_to_df(reviews)
-
Transform reviews to pandas.DataFrame() format.
Args
reviews
- a python dictionary corresponding to reviews
Returns
A pandas.DataFrame() of the reviews.
Expand source code
def imdb_reviews_to_df(reviews): """Transform reviews to pandas.DataFrame() format. Args: reviews: a python dictionary corresponding to reviews Returns: A pandas.DataFrame() of the reviews. """ features = ['items', 'imDbId', 'title', 'fullTitle', 'type', 'year'] if not all(feature in reviews for feature in features): return pd.DataFrame() reviews_df = pd.json_normalize( reviews, features[0], features[1:], record_prefix='review_' ) return reviews_df
def imdb_users_ratings_to_df(users_ratings, stat)
-
Transform reviews to pandas.DataFrame() format.
Args
users_ratings
- a python dictionary corresponding to users_ratings
stat
:optional
- string from {'ratings', 'demographicMales', 'demographicFemales', 'demographicAll'} (e.g. demographicAll). 'ratings' (default): shows the rating notes distribution among all voters. 'demographicMales': shows ratings per age range for males. 'demographicFemales': shows ratings per age range for females. 'demographicAll': shows ratings per age range for males and females.
Returns
A pandas.DataFrame() of the users_ratings.
Expand source code
def imdb_users_ratings_to_df(users_ratings, stat): """Transform reviews to pandas.DataFrame() format. Args: users_ratings: a python dictionary corresponding to users_ratings stat (optional): string from {'ratings', 'demographicMales', 'demographicFemales', 'demographicAll'} (e.g. demographicAll). 'ratings' (default): shows the rating notes distribution among all voters. 'demographicMales': shows ratings per age range for males. 'demographicFemales': shows ratings per age range for females. 'demographicAll': shows ratings per age range for males and females. Returns: A pandas.DataFrame() of the users_ratings. """ features = ['imDbId', 'title', 'fullTitle', 'type', 'year', 'totalRating', 'totalRatingVotes', 'ratings', 'demographicAll', 'demographicMales', 'demographicFemales', stat] if not all(feature in users_ratings for feature in features): return pd.DataFrame() users_ratings = nested_dict_to_list(users_ratings, 'demographicAll', 'ageRange') users_ratings = nested_dict_to_list(users_ratings, 'demographicMales', 'ageRange') users_ratings = nested_dict_to_list(users_ratings, 'demographicFemales', 'ageRange') users_ratings_df = pd.json_normalize( users_ratings, stat, features[:7], record_prefix='review_' ) return users_ratings_df