Reference
Naming Conventions
The extraction of year and title from an input string relies on a regex pattern. Multiple patterns are already provided.
If you feel like a pattern is missing, feel free to create a Pull Request!
Naming Convention # |
Input |
|---|---|
0 |
|
1 |
Tip
You can click the link in the second column to get a visual representation. You can also test if your examples match!
Schemas
All dataframes types are cast as follows:
def _get_schema(self, schema: str) -> Dict[str, object]:
"""Returns specified schema as dictionary.
Args:
schema: which schema to return
Returns:
Dictionary containing column-type mapping.
Raises:
KeyError: if specified schema is not a valid schema.
"""
if schema == "canon_input":
return {
"year": int,
"title": str,
"tmdb_id_first_pass": int,
"tmdb_id_second_pass": int,
"tmdb_id": int,
}
elif schema == "cast":
return {
"tmdb_id": int,
"cast.adult": bool,
"cast.gender": int,
"cast.id": int,
"cast.known_for_department": "category",
"cast.name": str,
"cast.original_name": str,
"cast.popularity": float,
"cast.profile_path": str,
"cast.cast_id": int,
"cast.character": str,
"cast.credit_id": str,
"cast.order": int,
}
elif schema == "crew":
return {
"tmdb_id": int,
"crew.adult": bool,
"crew.gender": int,
"crew.id": int,
"crew.known_for_department": "category",
"crew.name": str,
"crew.original_name": str,
"crew.popularity": float,
"crew.profile_path": str,
"crew.credit_id": str,
"crew.department": "category",
"crew.job": str,
}
elif schema == "belongs_to_collection":
return {
"tmdb_id": int,
"belongs_to_collection.id": int,
"belongs_to_collection.name": str,
"belongs_to_collection.poster_path": str,
"belongs_to_collection.backdrop_path": str,
}
elif schema == "genres":
return {
"tmdb_id": int,
"genres.id": int,
"genres.name": str,
}
elif schema == "production_companies":
return {
"tmdb_id": int,
"production_companies.id": int,
"production_companies.logo_path": str,
"production_companies.name": "category",
"production_companies.origin_country": "category",
}
elif schema == "production_countries":
return {
"tmdb_id": int,
"production_countries.iso_3166_1": "category",
"production_countries.name": str,
}
elif schema == "spoken_languages":
return {
"tmdb_id": int,
"spoken_languages.english_name": "category",
"spoken_languages.iso_639_1": "category",
"spoken_languages.name": str,
}
elif schema == "movie_details":
return {
"tmdb_id": int,
"adult": bool,
"backdrop_path": str,
"budget": int,
"homepage": str,
"imdb_id": str,
"original_language": "category",
"original_title": str,
"overview": str,
"popularity": float,
"poster_path": str,
"release_date": "datetime64[ns]",
"revenue": int,
"runtime": int,
"status": "category",
"tagline": str,
"title": str,
"video": bool,
"vote_average": float,
"vote_count": int,
}
else:
raise KeyError("Specified SCHEMA is unknown!")