Skip to content

Reference for youtube_dl_scraper/site_scrapers/caption_scrapers/downsub.py

youtube_dl_scraper.site_scrapers.caption_scrapers.downsub.DownSub

DownSub(download_path: str)

Bases: BaseScraper

A scraper wrapper for downsub.com

Source code in youtube_dl_scraper/core/base_scraper.py
def __init__(self, download_path: str):
    self.download_path = download_path
generate_payload
generate_payload(url: str) -> str

generate payload to fetch caption endpoint

Source code in youtube_dl_scraper/site_scrapers/caption_scrapers/downsub.py
def generate_payload(self, url: str) -> str:
    """generate payload to fetch caption endpoint"""
    return f"url = '{url}'\n" + payload
parse_caption_data
parse_caption_data(data: dict) -> dict

parse data from the caption endpoint to a general format

Source code in youtube_dl_scraper/site_scrapers/caption_scrapers/downsub.py
def parse_caption_data(self, data: dict) -> dict:
    """parse data from the caption endpoint to a general format"""
    if data.get("sourceName") != "Youtube":
        raise CaptionsNotFoundError("Capition found but invalid caption type")
    captions_data = {}  # the formatted dictionary for caption data
    dl_api = data.get("urlSubtitle")
    captions_data["title"] = data.get("title", "")
    captions_data["thumbnail"] = data.get("thumbnail", "")
    captions_data["duration"] = data.get("duration")
    captions_data["subtitles"] = []
    for sub in data.get("subtitles", []):
        sub["code"] = (
            "a." + str(find_language(sub["name"]))
            if "auto" in sub["code"]
            else sub["code"]
        )
        captions_data["subtitles"].append(sub)
    captions_data["translations"] = []
    for sub in data.get("subtitlesAutoTrans", []):
        sub["code"] = str(find_language(sub["name"]))  # fetch lang code
        captions_data["translations"].append(sub)
    # add more formating to include dowload url for each formart i.e. srt, txt, raw
    translation_types = ("subtitles", "translations")
    for t_type in translation_types:
        subs = captions_data[t_type]  # subtitle dict
        for sub in subs:  # for subtitle in subtitles
            sub["urls"] = {
                "raw": f"{dl_api}?url={sub['url']}&type=raw&title={captions_data['title']}",
                "txt": f"{dl_api}?url={sub['url']}&type=txt&title={captions_data['title']}",
                "srt": f"{dl_api}?url={sub['url']}&title={captions_data['title']}",
            }
            sub.pop("url")

    # print(captions_data)
    return captions_data
process_response
process_response(caption_endpoint: str) -> dict

fetch and processes data from the caption endpoint

Source code in youtube_dl_scraper/site_scrapers/caption_scrapers/downsub.py
def process_response(self, caption_endpoint: str) -> dict:
    """fetch and processes data from the caption endpoint"""
    if not caption_endpoint:
        raise CaptionsNotFoundError("Caption endpoint is empty.")
    response = requests.get(caption_endpoint)
    if response.status_code == 200:
        data = response.json()
        return self.parse_caption_data(data) if data else None
    else:
        raise YouTubeDLScraperError(
            "Invalid response code: {}".format(response.status_code)
        )