# Usage example: $ python captions-download.py Txvud7wPbv4 from __future__ import print_function from apiclient import discovery from httplib2 import Http from oauth2client import file, client, tools import os,json,pprint,getopt,sys import unicodedata import string valid_filename_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) def clean_filename(filename, whitelist=valid_filename_chars, replace=' '): # replace spaces for r in replace: filename = filename.replace(r,'_') # keep only valid ascii chars cleaned_filename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore').decode() # keep only whitelisted chars return ''.join(c for c in cleaned_filename if c in whitelist) SCOPES = 'https://www.googleapis.com/auth/youtube.force-ssl' store = file.Storage(os.path.expanduser('~/google-api/storage.json')) creds = store.get() if not creds or creds.invalid: flow = client.flow_from_clientsecrets(os.path.expanduser('~/google-api/client_secret.json', SCOPES)) creds = tools.run_flow(flow, store) YOUTUBE = discovery.build('youtube', 'v3', http=creds.authorize(Http())) opt_pl = None download_srt = True download_vtt = False try: opts, args = getopt.getopt(sys.argv[1:], "p:vs") except getopt.GetoptError as err: print(err) sys.exit(2) for o,a in opts: if o == "-p": opt_pl = a elif o == "-s": download_srt = not download_srt elif o == "-v": download_vtt = not download_vtt else: assert False, "unhandled option" def print_response(response): pp = pprint.PrettyPrinter(width=41, compact=True) pp.pprint(response) response = YOUTUBE.playlists().list(part='snippet,contentDetails', mine=True, maxResults=25).execute() pl_id = None for pl in response['items']: snippet = pl['snippet'] print('Playlist', snippet['title'], "has id", pl['id']) if opt_pl and opt_pl == snippet['title']: pl_id = pl['id'] if opt_pl is None: assert False, "You must specify a playlist" if pl_id is None: assert False, "Unable to find specified playlist" pl_dir = clean_filename( opt_pl + "-" + pl_id ) if os.path.isdir(pl_dir): assert False, "Cowardly refusing to overwrite existing directory" + pl_dir os.mkdir(pl_dir) ## ## Get all the videos in the specified playlist ## videos_in_pl = {} pageToken = None while True: ## ## Handle case of > 50 videos ## if pageToken==None: response = YOUTUBE.playlistItems().list(part='snippet,contentDetails', maxResults=5, playlistId=pl_id).execute() else: response = YOUTUBE.playlistItems().list(part='snippet,contentDetails', maxResults=5, pageToken=pageToken, playlistId=pl_id).execute() for item in response['items']: video_id = item['contentDetails']['videoId'] title = item['snippet']['title'] videos_in_pl[video_id] = title print("Got ", video_id, "->", title) if 'nextPageToken' in response: pageToken = response['nextPageToken'] else: break with open(pl_dir + "/" + "README", "w") as readme: readme.write("Videos are:\n") for vid in videos_in_pl: readme.write(vid + " -> " + videos_in_pl[vid] + "\n") ## ## We'll construct the upload script as we process each file ## upload_script = open(pl_dir + "/" + "UPLOAD-SCRIPT", "w") ## ## Now, process each video, downloading all captions ## for vid in videos_in_pl: vid_dir = pl_dir + '/' + vid os.makedirs(vid_dir) caption_info = YOUTUBE.captions().list( part='id,snippet', videoId=vid).execute().get('items', []) with open(vid_dir + '/' + 'captions.json', 'w') as capdir: pp = pprint.PrettyPrinter(width=41, stream=capdir, compact=True) pp.pprint(caption_info) for caption in caption_info: if download_vtt: vtt = YOUTUBE.captions().download(id=caption['id'], tfmt='vtt').execute() vtt_file = vid_dir + '/' + caption['id'] + '.vtt' with open(vtt_file, "wb") as vttfile: vttfile.write(vtt) if download_srt: srt = YOUTUBE.captions().download(id=caption['id'], tfmt='srt').execute() srt_file = vid_dir + '/' + caption['id'] + '.srt' with open(srt_file, "wb") as srtfile: srtfile.write(srt) upload_script.write("python ../captions.py --name='Edited' --language='en' --videoid='{videoid}' --captionid='{captionid}' --file='{filename}' --action=upload\n".format( videoid=vid, captionid=caption['id'], filename= vid + '/' + caption['id'] + '.srt' ) ) upload_script.close()