caption.py`¶

youtube_dl_scraper.core.caption.Caption ¶

Caption(caption_data: dict, title: str, download_path: str, translated: bool = False)

Data class for captions.

Parameters:

Name	Type	Description	Default
`caption_data`	`dict`	The raw caption data, including language code, name, and download URLs.	required
`title`	`str`	The title of the associated video.	required
`download_path`	`str`	The directory where captions will be downloaded.	required
`translated`	`bool`	Whether the caption is translated. Defaults to False.	`False`

Source code in youtube_dl_scraper/core/caption.py

def __init__(
    self,
    caption_data: dict,
    title: str,
    download_path: str,
    translated: bool = False,
):
    """
    Initialize the Caption object.

    Args:
        caption_data (dict): The raw caption data, including language code, name, and download URLs.
        title (str): The title of the associated video.
        download_path (str): The directory where captions will be downloaded.
        translated (bool, optional): Whether the caption is translated. Defaults to False.
    """
    self.raw_caption_data = caption_data
    self.title = title
    self.translated = translated
    self.lang = caption_data["code"]
    self.lang_name = caption_data["name"]
    self.download_dir = download_path

raw `property` ¶

raw: str

Retrieve the raw caption content in SRT format.

Returns:

Name	Type	Description
`str`	`str`	The raw SRT content as a string.

Raises:

Type	Description
`NotImplementedError`	If the caption does not support the SRT format.

srt ¶

srt(content: bool = False, download_path: Optional[str] = None, filename: Optional[str] = None, skip_existent: bool = False) -> Union[str, Path]

Download or retrieve the caption in SRT format.

Parameters:

Name	Type	Description	Default
`content`	`bool`	If True, return the content as a string; if False, save it to disk. Defaults to False.	`False`
`download_path`	`Optional[str]`	The directory to save the file. Defaults to self.download_dir.	`None`
`filename`	`Optional[str]`	The name of the file. Extracted from the content-disposition header if not provided.	`None`
`skip_existent`	`bool`	If True, skips downloading if a matching file already exists. Defaults to False.	`False`

Returns:

Type	Description
`Union[str, Path]`	Union[str, Path]: File path if content is False, otherwise the SRT content as a string.

Raises:

Type	Description
`NotImplementedError`	If the caption does not support the SRT format.
`FileNotFoundError`	If the specified file path is invalid.
`PermissionError`	If permissions are insufficient.
`IsADirectoryError`	If the specified file path is a directory.
`IOError`	For I/O-related errors.
`OSError`	For OS-level errors.

Source code in youtube_dl_scraper/core/caption.py

def srt(
    self,
    content: bool = False,
    download_path: Optional[str] = None,
    filename: Optional[str] = None,
    skip_existent: bool = False,
) -> Union[str, Path]:
    """
    Download or retrieve the caption in SRT format.

    Args:
        content (bool, optional): If True, return the content as a string; if False, save it to disk. Defaults to False.
        download_path (Optional[str], optional): The directory to save the file. Defaults to self.download_dir.
        filename (Optional[str], optional): The name of the file. Extracted from the content-disposition header if not provided.
        skip_existent (bool, optional): If True, skips downloading if a matching file already exists. Defaults to False.

    Returns:
        Union[str, Path]: File path if content is False, otherwise the SRT content as a string.

    Raises:
        NotImplementedError: If the caption does not support the SRT format.
        FileNotFoundError: If the specified file path is invalid.
        PermissionError: If permissions are insufficient.
        IsADirectoryError: If the specified file path is a directory.
        IOError: For I/O-related errors.
        OSError: For OS-level errors.
    """
    dl_link = self.raw_caption_data.get("urls", dict()).get("srt")
    if not dl_link:
        raise NotImplementedError("caption object don't support the srt format")
    response = requests.get(dl_link)
    response.raise_for_status()
    if not content:
        filename = filename or get_filename_from_cd(
            response.headers.get("content-disposition")
        )
        download_path = download_path or self.download_dir
        filepath = Path(download_path).joinpath(filename)

        if filepath.exists() and skip_existent:
            if filepath.stat().st_size == len(response.content):
                print("skipping save because file already exists")
                return filepath.resolve()

        print("Saving file")

        try:
            with filepath.open("wb") as file:
                file.write(response.content)
            return filepath.resolve()
        except FileNotFoundError as e:
            print("The specified file was not found.")
            raise e
        except PermissionError as e:
            print("You do not have permission to access this file.")
            raise e
        except IsADirectoryError as e:
            print("Expected a file but found a directory.")
            raise e
        except IOError as e:
            print("An IOError occurred.")
            raise e
        except OSError as e:
            print(f"An OS error occurred: {e}")
            raise e
    else:
        return response.content.decode("utf-8")

txt ¶

txt(content: bool = False, download_path: Optional[str] = None, filename: Optional[str] = None, skip_existent: bool = False) -> Union[str, Path]

Download or retrieve the caption in TXT format.

Parameters:

Name	Type	Description	Default
`content`	`bool`	If True, return the content as a string; if False, save it to disk. Defaults to False.	`False`
`download_path`	`Optional[str]`	The directory to save the file. Defaults to self.download_dir.	`None`
`filename`	`Optional[str]`	The name of the file. Extracted from the content-disposition header if not provided.	`None`
`skip_existent`	`bool`	If True, skips downloading if a matching file already exists. Defaults to False.	`False`

Returns:

Type	Description
`Union[str, Path]`	Union[str, Path]: File path if content is False, otherwise the TXT content as a string.

Raises:

Type	Description
`NotImplementedError`	If the caption does not support the TXT format.
`FileNotFoundError`	If the specified file path is invalid.
`PermissionError`	If permissions are insufficient.
`IsADirectoryError`	If the specified file path is a directory.
`IOError`	For I/O-related errors.
`OSError`	For OS-level errors.

Source code in youtube_dl_scraper/core/caption.py

def txt(
    self,
    content: bool = False,
    download_path: Optional[str] = None,
    filename: Optional[str] = None,
    skip_existent: bool = False,
) -> Union[str, Path]:
    """
    Download or retrieve the caption in TXT format.

    Args:
        content (bool, optional): If True, return the content as a string; if False, save it to disk. Defaults to False.
        download_path (Optional[str], optional): The directory to save the file. Defaults to self.download_dir.
        filename (Optional[str], optional): The name of the file. Extracted from the content-disposition header if not provided.
        skip_existent (bool, optional): If True, skips downloading if a matching file already exists. Defaults to False.

    Returns:
        Union[str, Path]: File path if content is False, otherwise the TXT content as a string.

    Raises:
        NotImplementedError: If the caption does not support the TXT format.
        FileNotFoundError: If the specified file path is invalid.
        PermissionError: If permissions are insufficient.
        IsADirectoryError: If the specified file path is a directory.
        IOError: For I/O-related errors.
        OSError: For OS-level errors.
    """
    dl_link = self.raw_caption_data.get("urls", dict()).get("txt")
    if not dl_link:
        raise NotImplementedError("caption object don't support the txt format")
    response = requests.get(dl_link)
    response.raise_for_status()
    if not content:
        filename = filename or get_filename_from_cd(
            response.headers.get("content-disposition")
        )
        download_path = download_path or self.download_dir
        filepath = Path(download_path).joinpath(filename)

        if filepath.exists() and skip_existent:
            if filepath.stat().st_size == len(response.content):
                print("skipping save because file already exists")
                return filepath.resolve()
            else:
                print("Saving file")

        try:
            with filepath.open("wb") as file:
                file.write(response.content)
            return filepath.resolve()
        except FileNotFoundError as e:
            print("The specified file was not found.")
            raise e
        except PermissionError as e:
            print("You do not have permission to access this file.")
            raise e
        except IsADirectoryError as e:
            print("Expected a file but found a directory.")
            raise e
        except IOError as e:
            print("An IOError occurred.")
            raise e
        except OSError as e:
            print(f"An OS error occurred: {e}")
            raise e
    else:
        return response.content.decode("utf-8")

Reference for youtube_dl_scraper/core/caption.py¶

youtube_dl_scraper.core.caption.Caption ¶

raw property ¶

srt ¶

txt ¶

Reference for `youtube_dl_scraper/core/caption.py`¶

raw `property` ¶