#!/usr/bin/env python
# encoding=utf-8

"""TSN post-processor.

Finds recordings that took place today between 21:00 and 23:00, downloads them,
combines, clears up with sox and encodes using lame.

Part of the tmradio.net toolkit.

Example crontab line:
10 23 * * 4 TSN_NOISE_PROFILE=~/noise-sample.noise-profile ~/bin/tmradio-process-tsn 2>&1 | mail -s "TSN post-processing status" info@tmradio.net

Debian/Ubuntu dependencies:
python-feedparser python-mutagen sox libsox-fmt-mp3 lame mp3gain

License: Public Domain.
"""


import glob
import os
import subprocess
import sys
import tempfile
import time
import traceback
import urllib2

import feedparser
import mutagen.easyid3


FEED_URL = 'http://stream.tmradio.net/dump/live.xml'
TSN_FEED_URL = 'http://rss.tmradio.net/tmradio/tsn'
NOISE_PROFILE = 'noise-sample.noise-profile'
UPLOAD_HOST = 'stream.tmradio.net'
UPLOAD_PATH = '/radio/sites/files.tmradio.net/audio/sosonews/sosonews-%02u.mp3'
S3_UPLOAD_PATH = 's3://files.tmradio.net/audio/sosonews/sosonews-%02u.mp3'
DOWNLOAD_URL = 'http://files.tmradio.net/audio/sosonews/sosonews-%02u.mp3'
TEMP_FOLDER = '/tmp'

tsn_episode_number = 0
tsn_episode_date = None


def fetch_file(url):
    """Downloads the file, returns the name of a temporary file (make sure you
    delete it after use)."""
    fd, filename = tempfile.mkstemp(prefix='tsn_', suffix='.mp3', dir=TEMP_FOLDER)
    req = urllib2.urlopen(urllib2.Request(url))

    print 'Downloading %s as %s' % (url, filename)
    out = os.fdopen(fd, 'wb')
    while True:
        data = req.read(16384)
        if not data:
            break
        out.write(data)
    out.close()

    return filename


def unlink(filename):
    """Deletes a file, printing its size before that."""
    if os.path.exists(filename):
        print 'Deleting %s (was %u bytes long).' % (filename, os.stat(filename).st_size)
        os.unlink(filename)


def run_command(cmd):
    """Executes the specified command, returns True on success."""
    print u'> ' + u' '.join(cmd)
    return subprocess.Popen(cmd).wait() == 0


def find_urls():
    """Returns urls that correspond to the live show (recording which take
    place during 21:00-23:00).  URLs must be of this form:

    http://stream.tmradio.net/dump/live-20110623-2304.mp3
    """
    urls = []
    prefix = None
    global tsn_episode_date

    entries = feedparser.parse(FEED_URL)['entries']
    if not entries:
        return urls

    for entry in entries:
        url = entry['enclosures'][0]['href']

        if prefix is not None and not url.startswith(prefix):
            print 'Skipped: %s (wrong date)' % url
            continue

        ts = int(url[-8:-4])
        if ts < 2055 or ts > 2300:
            print 'Skipped: %s (wrong time)' % url
            continue

        urls.append(url)
        print 'Found', url
        tsn_episode_date = time.mktime(entry['updated_parsed'])
        if prefix is None:
            prefix = url[:-9]
    return sorted(urls)


def download_files(urls):
    """Downloads all files, returns names of temporary files."""
    filenames = []
    for url in urls:
        filenames.append(fetch_file(url))
    return filenames


def check_noise_profile():
    """Makes sure that there is a noise profile."""
    if not os.path.exists(NOISE_PROFILE):
        raise Exception('File %s not found, unable to proceed.' % NOISE_PROFILE)


def check_binaries():
    """Makes sure that sox and lame are installed."""
    have_sox = have_lame = have_mp3gain = False
    for d in os.environ['PATH'].split(os.pathsep):
        if os.path.exists(os.path.join(d, 'sox')):
            have_sox = True
        if os.path.exists(os.path.join(d, 'lame')):
            have_lame = True
        if os.path.exists(os.path.join(d, 'mp3gain')):
            have_mp3gain = True
    if not have_sox:
        raise Exception('Please install sox.')
    if not have_lame:
        raise Exception('Please install lame.')
    if not have_mp3gain:
        raise Exception('Please install mp3gain.')


def check_environment():
    """Reads settings from the environment."""
    if 'TSN_NOISE_PROFILE' in os.environ:
        global NOISE_PROFILE
        NOISE_PROFILE = os.environ['TSN_NOISE_PROFILE']

    if 'TSN_UPLOAD_HOST' in os.environ:
        global UPLOAD_HOST
        UPLOAD_HOST = os.environ['TSN_UPLOAD_HOST']

    if 'TSN_UPLOAD_PATH' in os.environ:
        global UPLOAD_PATH
        UPLOAD_PATH = os.environ['TSN_UPLOAD_PATH']

    if 'TSN_TEMP_FOLDER' in os.environ:
        global TEMP_FOLDER
        TEMP_FOLDER = os.environ['TSN_TEMP_FOLDER']
        print 'Will store temporary files in %s' % TEMP_FOLDER


def find_episode_id():
    """Returns the next episode id."""
    entries = feedparser.parse(TSN_FEED_URL)['entries']
    if not entries:
        raise Exception("Empty RSS feed: %s" % TSN_FEED_URL)

    entry = entries[0]
    parts = entry['id'].split('/')
    if parts[2] == 'wiki.tmradio.net':
        current = parts[-1]
    else:
        current = parts[-2]
    if not current.isdigit():
        raise ValueError('Unknown episode URL format: ' + entry['id'])
    return int(current) + 1


def transcode_files(filenames):
    """Works on the specified files, returns the new temporary file name (final output)."""
    temp_wav = tempfile.mkstemp(prefix='tsn_', suffix='.wav')[1]
    temp_mp3 = tempfile.mkstemp(prefix='tsn_', suffix='.mp3')[1]

    #run_command(['sox'] + filenames + [temp_wav, 'noisered', NOISE_PROFILE, '0.3', 'silence', '-l', '1', '0.2', '-50d', '-1', '0.2', '-50d', 'norm'])
    run_command(['sox'] + filenames + [temp_wav, 'noisered', NOISE_PROFILE, '0.2', 'silence', '-l', '1', '0.2', '-50d', '-1', '0.2', '-50d', 'norm'])
    run_command(['lame', '--quiet', '--preset', 'extreme', '-B', '128', '--resample', '44100', temp_wav, temp_mp3])

    unlink(temp_wav)
    for fn in filenames:
        unlink(fn)

    print 'Output is in %s' % temp_mp3
    return temp_mp3


def write_file_metadata(filename, episode_id, episode_ts):
    """Writes correct file metadata."""
    episode_date = time.strftime('%d.%m.%Y', time.localtime(episode_ts))

    try:
        tags = mutagen.easyid3.Open(filename)
        tags['tracknumber'] = str(episode_id)
        tags['artist'] = u'Тоже мне радио'
        tags['title'] = u'ТСН №%u от %s' % (episode_id, episode_date)
        tags.save()
    except Exception, e:
        print >>sys.stderr, 'Could not write tags: %s' % e


def upload_file_scp(filename, episode_id):
    """Uploads the file as the specified episode."""
    output = UPLOAD_PATH % episode_id
    run_command(['scp', '-B', filename, UPLOAD_HOST + ':' + output])


def upload_file_s3(filename, episode_id):
    """Uploads the file as the specified episode."""
    output = S3_UPLOAD_PATH % episode_id
    run_command(['s3cmd', 'put', '-PMH', filename, output])


def main():
    """Starts processing all files."""
    ts = time.time()

    check_environment()

    pattern = os.path.join(TEMP_FOLDER, 'tsn_*')
    for fn in glob.glob(pattern):
        unlink(fn)

    check_noise_profile()
    check_binaries()

    episode_id = find_episode_id()
    if episode_id is None:
        raise Exception("Could not guess episode number.")

    print 'Working on episode %u ...' % episode_id

    urls = find_urls()
    filenames = download_files(urls)
    if not filenames:
        raise Exception('No source files found.')
    output = transcode_files(filenames)
    write_file_metadata(output, episode_id, tsn_episode_date)
    upload_file_s3(output, episode_id)

    print 'It all took %u minutes.\n' % (int((time.time() - ts) / 60))
    print 'File URL:  %s' % (DOWNLOAD_URL % episode_id)
    print 'File size: %u byte(s)' % os.stat(output).st_size


if __name__ == '__main__':
    attempts = 0
    while attempts < 100:
        try:
            main()
            exit(0)
        except KeyboardInterrupt:
            exit(0)
        except Exception, e:
            print >>sys.stderr, e, traceback.format_exc(e)
            print 'Retrying in 60 seconds.'
        attempts += 1
