Skip to content

Reference for youtube_dl_scraper/core/caption.py

youtube_dl_scraper.core.caption.Caption

Caption(caption_data: dict, title: str, download_path: str, translated: bool = False)

Data class for captions.

Parameters:

Name Type Description Default
caption_data dict

The raw caption data, including language code, name, and download URLs.

required
title str

The title of the associated video.

required
download_path str

The directory where captions will be downloaded.

required
translated bool

Whether the caption is translated. Defaults to False.

False
Source code in youtube_dl_scraper/core/caption.py
def __init__(
    self,
    caption_data: dict,
    title: str,
    download_path: str,
    translated: bool = False,
):
    """
    Initialize the Caption object.

    Args:
        caption_data (dict): The raw caption data, including language code, name, and download URLs.
        title (str): The title of the associated video.
        download_path (str): The directory where captions will be downloaded.
        translated (bool, optional): Whether the caption is translated. Defaults to False.
    """
    self.raw_caption_data = caption_data
    self.title = title
    self.translated = translated
    self.lang = caption_data["code"]
    self.lang_name = caption_data["name"]
    self.download_dir = download_path
raw property
raw: str

Retrieve the raw caption content in SRT format.

Returns:

Name Type Description
str str

The raw SRT content as a string.

Raises:

Type Description
NotImplementedError

If the caption does not support the SRT format.

srt
srt(content: bool = False, download_path: Optional[str] = None, filename: Optional[str] = None, skip_existent: bool = False) -> Union[str, Path]

Download or retrieve the caption in SRT format.

Parameters:

Name Type Description Default
content bool

If True, return the content as a string; if False, save it to disk. Defaults to False.

False
download_path Optional[str]

The directory to save the file. Defaults to self.download_dir.

None
filename Optional[str]

The name of the file. Extracted from the content-disposition header if not provided.

None
skip_existent bool

If True, skips downloading if a matching file already exists. Defaults to False.

False

Returns:

Type Description
Union[str, Path]

Union[str, Path]: File path if content is False, otherwise the SRT content as a string.

Raises:

Type Description
NotImplementedError

If the caption does not support the SRT format.

FileNotFoundError

If the specified file path is invalid.

PermissionError

If permissions are insufficient.

IsADirectoryError

If the specified file path is a directory.

IOError

For I/O-related errors.

OSError

For OS-level errors.

Source code in youtube_dl_scraper/core/caption.py
def srt(
    self,
    content: bool = False,
    download_path: Optional[str] = None,
    filename: Optional[str] = None,
    skip_existent: bool = False,
) -> Union[str, Path]:
    """
    Download or retrieve the caption in SRT format.

    Args:
        content (bool, optional): If True, return the content as a string; if False, save it to disk. Defaults to False.
        download_path (Optional[str], optional): The directory to save the file. Defaults to self.download_dir.
        filename (Optional[str], optional): The name of the file. Extracted from the content-disposition header if not provided.
        skip_existent (bool, optional): If True, skips downloading if a matching file already exists. Defaults to False.

    Returns:
        Union[str, Path]: File path if content is False, otherwise the SRT content as a string.

    Raises:
        NotImplementedError: If the caption does not support the SRT format.
        FileNotFoundError: If the specified file path is invalid.
        PermissionError: If permissions are insufficient.
        IsADirectoryError: If the specified file path is a directory.
        IOError: For I/O-related errors.
        OSError: For OS-level errors.
    """
    dl_link = self.raw_caption_data.get("urls", dict()).get("srt")
    if not dl_link:
        raise NotImplementedError("caption object don't support the srt format")
    response = requests.get(dl_link)
    response.raise_for_status()
    if not content:
        filename = filename or get_filename_from_cd(
            response.headers.get("content-disposition")
        )
        download_path = download_path or self.download_dir
        filepath = Path(download_path).joinpath(filename)

        if filepath.exists() and skip_existent:
            if filepath.stat().st_size == len(response.content):
                print("skipping save because file already exists")
                return filepath.resolve()

        print("Saving file")

        try:
            with filepath.open("wb") as file:
                file.write(response.content)
            return filepath.resolve()
        except FileNotFoundError as e:
            print("The specified file was not found.")
            raise e
        except PermissionError as e:
            print("You do not have permission to access this file.")
            raise e
        except IsADirectoryError as e:
            print("Expected a file but found a directory.")
            raise e
        except IOError as e:
            print("An IOError occurred.")
            raise e
        except OSError as e:
            print(f"An OS error occurred: {e}")
            raise e
    else:
        return response.content.decode("utf-8")
txt
txt(content: bool = False, download_path: Optional[str] = None, filename: Optional[str] = None, skip_existent: bool = False) -> Union[str, Path]

Download or retrieve the caption in TXT format.

Parameters:

Name Type Description Default
content bool

If True, return the content as a string; if False, save it to disk. Defaults to False.

False
download_path Optional[str]

The directory to save the file. Defaults to self.download_dir.

None
filename Optional[str]

The name of the file. Extracted from the content-disposition header if not provided.

None
skip_existent bool

If True, skips downloading if a matching file already exists. Defaults to False.

False

Returns:

Type Description
Union[str, Path]

Union[str, Path]: File path if content is False, otherwise the TXT content as a string.

Raises:

Type Description
NotImplementedError

If the caption does not support the TXT format.

FileNotFoundError

If the specified file path is invalid.

PermissionError

If permissions are insufficient.

IsADirectoryError

If the specified file path is a directory.

IOError

For I/O-related errors.

OSError

For OS-level errors.

Source code in youtube_dl_scraper/core/caption.py
def txt(
    self,
    content: bool = False,
    download_path: Optional[str] = None,
    filename: Optional[str] = None,
    skip_existent: bool = False,
) -> Union[str, Path]:
    """
    Download or retrieve the caption in TXT format.

    Args:
        content (bool, optional): If True, return the content as a string; if False, save it to disk. Defaults to False.
        download_path (Optional[str], optional): The directory to save the file. Defaults to self.download_dir.
        filename (Optional[str], optional): The name of the file. Extracted from the content-disposition header if not provided.
        skip_existent (bool, optional): If True, skips downloading if a matching file already exists. Defaults to False.

    Returns:
        Union[str, Path]: File path if content is False, otherwise the TXT content as a string.

    Raises:
        NotImplementedError: If the caption does not support the TXT format.
        FileNotFoundError: If the specified file path is invalid.
        PermissionError: If permissions are insufficient.
        IsADirectoryError: If the specified file path is a directory.
        IOError: For I/O-related errors.
        OSError: For OS-level errors.
    """
    dl_link = self.raw_caption_data.get("urls", dict()).get("txt")
    if not dl_link:
        raise NotImplementedError("caption object don't support the txt format")
    response = requests.get(dl_link)
    response.raise_for_status()
    if not content:
        filename = filename or get_filename_from_cd(
            response.headers.get("content-disposition")
        )
        download_path = download_path or self.download_dir
        filepath = Path(download_path).joinpath(filename)

        if filepath.exists() and skip_existent:
            if filepath.stat().st_size == len(response.content):
                print("skipping save because file already exists")
                return filepath.resolve()
            else:
                print("Saving file")

        try:
            with filepath.open("wb") as file:
                file.write(response.content)
            return filepath.resolve()
        except FileNotFoundError as e:
            print("The specified file was not found.")
            raise e
        except PermissionError as e:
            print("You do not have permission to access this file.")
            raise e
        except IsADirectoryError as e:
            print("Expected a file but found a directory.")
            raise e
        except IOError as e:
            print("An IOError occurred.")
            raise e
        except OSError as e:
            print(f"An OS error occurred: {e}")
            raise e
    else:
        return response.content.decode("utf-8")