Reference

Naming Conventions

The extraction of year and title from an input string relies on a regex pattern. Multiple patterns are already provided.

If you feel like a pattern is missing, feel free to create a Pull Request!

Naming Convention #

Input

0

1999 The Matrix

1

1999 - The Matrix

Tip

You can click the link in the second column to get a visual representation. You can also test if your examples match!

Schemas

All dataframes types are cast as follows:

    def _get_schema(self, schema: str) -> Dict[str, object]:
        """Returns specified schema as dictionary.

        Args:
          schema: which schema to return

        Returns:
          Dictionary containing column-type mapping.

        Raises:
          KeyError: if specified schema is not a valid schema.
        """
        if schema == "canon_input":
            return {
                "year": int,
                "title": str,
                "tmdb_id_first_pass": int,
                "tmdb_id_second_pass": int,
                "tmdb_id": int,
            }
        elif schema == "cast":
            return {
                "tmdb_id": int,
                "cast.adult": bool,
                "cast.gender": int,
                "cast.id": int,
                "cast.known_for_department": "category",
                "cast.name": str,
                "cast.original_name": str,
                "cast.popularity": float,
                "cast.profile_path": str,
                "cast.cast_id": int,
                "cast.character": str,
                "cast.credit_id": str,
                "cast.order": int,
            }
        elif schema == "crew":
            return {
                "tmdb_id": int,
                "crew.adult": bool,
                "crew.gender": int,
                "crew.id": int,
                "crew.known_for_department": "category",
                "crew.name": str,
                "crew.original_name": str,
                "crew.popularity": float,
                "crew.profile_path": str,
                "crew.credit_id": str,
                "crew.department": "category",
                "crew.job": str,
            }
        elif schema == "belongs_to_collection":
            return {
                "tmdb_id": int,
                "belongs_to_collection.id": int,
                "belongs_to_collection.name": str,
                "belongs_to_collection.poster_path": str,
                "belongs_to_collection.backdrop_path": str,
            }
        elif schema == "genres":
            return {
                "tmdb_id": int,
                "genres.id": int,
                "genres.name": str,
            }
        elif schema == "production_companies":
            return {
                "tmdb_id": int,
                "production_companies.id": int,
                "production_companies.logo_path": str,
                "production_companies.name": "category",
                "production_companies.origin_country": "category",
            }
        elif schema == "production_countries":
            return {
                "tmdb_id": int,
                "production_countries.iso_3166_1": "category",
                "production_countries.name": str,
            }
        elif schema == "spoken_languages":
            return {
                "tmdb_id": int,
                "spoken_languages.english_name": "category",
                "spoken_languages.iso_639_1": "category",
                "spoken_languages.name": str,
            }
        elif schema == "movie_details":
            return {
                "tmdb_id": int,
                "adult": bool,
                "backdrop_path": str,
                "budget": int,
                "homepage": str,
                "imdb_id": str,
                "original_language": "category",
                "original_title": str,
                "overview": str,
                "popularity": float,
                "poster_path": str,
                "release_date": "datetime64[ns]",
                "revenue": int,
                "runtime": int,
                "status": "category",
                "tagline": str,
                "title": str,
                "video": bool,
                "vote_average": float,
                "vote_count": int,
            }
        else:
            raise KeyError("Specified SCHEMA is unknown!")