# Usage example: $ python captions-download.py Txvud7wPbv4

from __future__ import print_function

from apiclient import discovery
from httplib2 import Http
from oauth2client import file, client, tools
import os,json,pprint,getopt,sys

import unicodedata
import string
valid_filename_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)

def clean_filename(filename, whitelist=valid_filename_chars, replace=' '):
    # replace spaces
    for r in replace:
        filename = filename.replace(r,'_')
    
    # keep only valid ascii chars
    cleaned_filename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore').decode()
    
    # keep only whitelisted chars
    return ''.join(c for c in cleaned_filename if c in whitelist)


SCOPES = 'https://www.googleapis.com/auth/youtube.force-ssl'
store = file.Storage(os.path.expanduser('~/google-api/storage.json'))
creds = store.get()
if not creds or creds.invalid:
    flow = client.flow_from_clientsecrets(os.path.expanduser('~/google-api/client_secret.json', SCOPES))
    creds = tools.run_flow(flow, store)
YOUTUBE = discovery.build('youtube', 'v3', http=creds.authorize(Http()))

opt_pl = None
download_srt = True
download_vtt = False
try:
    opts, args = getopt.getopt(sys.argv[1:], "p:vs")
except getopt.GetoptError as err:
    print(err)
    sys.exit(2)

for o,a in opts:
    if o == "-p":
        opt_pl = a
    elif o == "-s":
        download_srt = not download_srt
    elif o == "-v":
        download_vtt = not download_vtt
    else:
        assert False, "unhandled option"

def print_response(response):
    pp = pprint.PrettyPrinter(width=41, compact=True)
    pp.pprint(response)


response = YOUTUBE.playlists().list(part='snippet,contentDetails',
                                        mine=True,
                                        maxResults=25).execute()

pl_id = None
for pl in response['items']:
    snippet = pl['snippet']
    print('Playlist', snippet['title'], "has id", pl['id'])
    if opt_pl and opt_pl == snippet['title']:
        pl_id = pl['id']

if opt_pl is None:
    assert False, "You must specify a playlist"

if pl_id is None:
    assert False, "Unable to find specified playlist"

pl_dir = clean_filename( opt_pl + "-" + pl_id )
if os.path.isdir(pl_dir):
    assert False, "Cowardly refusing to overwrite existing directory" + pl_dir
os.mkdir(pl_dir)

##
## Get all the videos in the specified playlist
##
videos_in_pl = {}
pageToken = None
while True:
    ##
    ## Handle case of > 50 videos
    ##
    if pageToken==None:
        response = YOUTUBE.playlistItems().list(part='snippet,contentDetails',
                                                maxResults=5,
                                                playlistId=pl_id).execute()
    else:
        response = YOUTUBE.playlistItems().list(part='snippet,contentDetails',
                                                maxResults=5,
                                                pageToken=pageToken,
                                                playlistId=pl_id).execute()
    for item in response['items']:
        video_id = item['contentDetails']['videoId']
        title = item['snippet']['title']
        videos_in_pl[video_id] = title
        print("Got ", video_id, "->", title)

    if 'nextPageToken' in response:
        pageToken = response['nextPageToken']
    else:
        break

with open(pl_dir + "/" + "README", "w") as readme:
    readme.write("Videos are:\n")
    for vid in videos_in_pl:
        readme.write(vid + " -> " + videos_in_pl[vid] + "\n")


##
## We'll construct the upload script as we process each file
##
upload_script = open(pl_dir + "/" + "UPLOAD-SCRIPT", "w")


##
## Now, process each video, downloading all captions
##
for vid in videos_in_pl:
    vid_dir = pl_dir + '/' + vid
    os.makedirs(vid_dir)
    caption_info = YOUTUBE.captions().list(
        part='id,snippet', videoId=vid).execute().get('items', [])

    with open(vid_dir + '/' + 'captions.json', 'w') as capdir:
        pp = pprint.PrettyPrinter(width=41, stream=capdir, compact=True)
        pp.pprint(caption_info)

    for caption in caption_info:
        if download_vtt:
            vtt = YOUTUBE.captions().download(id=caption['id'], tfmt='vtt').execute()
            vtt_file = vid_dir + '/' + caption['id'] + '.vtt'
            with open(vtt_file, "wb") as vttfile:
                vttfile.write(vtt)
        if download_srt:
            srt = YOUTUBE.captions().download(id=caption['id'], tfmt='srt').execute()
            srt_file = vid_dir + '/' + caption['id'] + '.srt'
            with open(srt_file, "wb") as srtfile:
                srtfile.write(srt)

            upload_script.write("python ../captions.py --name='Edited' --language='en' --videoid='{videoid}' --captionid='{captionid}' --file='{filename}' --action=upload\n".format(
                videoid=vid,
                captionid=caption['id'],
                filename= vid + '/' + caption['id'] + '.srt'  ) )

upload_script.close()