#!/usr/bin/env python
# ###################################################
# pdf2mp3.py - little script/program to convert a
# pdf-file or ascii-file (.dat, .txt) into a mp3 audio or wav file
#
# Copyright (C) 2010 Hannes Rennau
# hannes@bolding-burchard.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
# ###################################################

# LIST OF PACKAGES NEEDED:
# you need to install the following packages:
# sudo apt-get install python poppler-utils festival festvox-rablpc16k
# lame espeak wavbreaker 

# HOW TO USE:
# 1.create a file with the name pdf2mp3 and copy the content of
#   the whole text in there
# 2.make the file an executable via:
#   >>> chmod +x pdf2mp3
# 3.copy file to /usr/bin to make usage of program possible from everywhere
# on your computer:  >>> sudo cp pdf2mp3 /usr/bin/
# 4.after that get help calling: 
#   pdf2mp3 -h
#  
# 5.EXAMPLE:
#   you want to convert yourfilename.pdf into a mp3 file, then just type:
#   pdf2mp3 -v en -f yourfilename.pdf -o yourfilename.mp3   
#   (for the english voice 'en', for german voice 'de',
#   type: espeak --voices to get list of voices available on your system)
#
#
# edited by busfahrer 24.07.2010

import os,sys
import string
import time
import glob
from subprocess import call, Popen, PIPE
from optparse import OptionParser as op

def main():
    """Parses command line
    """
    

    parser = op(usage='%prog -v [de,en,...] -f filename[.pdf|.txt|.dat] \
                                     -o [wav|mp3] [optional: --ascii]',
    description='This script convertes ASCII files (basically those files with \
                 extension .txt or .dat) or pdf files into an mp3 (or wav) \
                 audio file.',version=r'$v0.1$')

    parser.add_option('-v','--voice', type='string',metavar='VOICENAME',
                      help='name of the voice to be used.\
                      type: ***espeak --voices*** to get list of available \
                      voices on your system.')
    parser.add_option('-f','--file',  type='string',metavar='SOURCEFILENAME',
                      help='input path of file to read \
                      (and late on convert to audio file).This can be a pdf or \
                      ascii (.txt or .dat) file. extension must be given!')
    parser.add_option('-o','--output', type='string',metavar='OUTPUTFILENAME',
                      help='Output filename (with extension .wav or .mp3 that \
                      the program knows which audio format you want.)')

    options,args = parser.parse_args()

    if options.voice is None:
        print 'no voice name given, use -v voicename \
              [type ***espeak --voices*** for list of available voices]'
        return 2

    if options.file is None:
        print 'no input file name given [please use: -f filename]'
        return 2

    if options.output is None:
        print 'no output file name given \
              [please use: -o outputfilename.[wav|mp3]'
        return 2

    filename_inp = str(options.file)
    filename_out = str(options.output)
    ifwav = False
    ifmp3 = False
    if filename_out[-4:] != '.wav' and filename_out[-4:] != '.mp3':
        print 'please decide whether you want wav or mp3 format by \
               typing -o filename.wav or -o filename.mp3'
    elif filename_out[-4:] == '.wav':
        ifwav = True
    else:
        ifmp3 = True

    if os.path.isfile(filename_inp) and (ifwav or ifmp3): 
        if filename_inp[-4:] != '.dat' and filename_inp[-4:] != '.txt' and \
        filename_inp[-4:] != '.pdf':
            print '*** input file does not have extension (.txt, .dat, .pdf) ***'
        elif filename_inp[-4:] == '.pdf':
            print 'converting pdf file: ' + filename_inp + ' to ASCII'
            pdf_convert_to_ascii(filename_inp)
            if ifwav:
                convert_to_wav(filename_inp[:-4] + '.txt', filename_out,
                               options.voice)
                join_wav_files(filename_out)
            elif ifmp3:
                convert_to_wav(filename_inp[:-4] + '.txt', filename_out,
                               options.voice)
                join_wav_files(filename_out)
                convert_wav_2_mp3(filename_out)
        elif filename_inp[-4:] == '.dat' or filename_inp[-4:] == '.txt':
            if ifwav:
                convert_to_wav(filename_inp, filename_out, options.voice)
                join_wav_files(filename_out)
            elif ifmp3:
                convert_to_wav(filename_inp, filename_out, options.voice)
                join_wav_files(filename_out)
                convert_wav_2_mp3(filename_out)

    else:
        print '*** input file %s does not exist ***'% filename_inp


    print ifmp3,ifwav
 
def pdf_convert_to_ascii(input_pdf_file):
    call(['pdftotext', input_pdf_file, input_pdf_file[:-4] + '.txt'])

def convert_to_wav(input_ascii_file,output_wav_file,language):
    cat_out = Popen(['cat', input_ascii_file], stdout=PIPE)
    sed_out = Popen(['sed', 's/[^a-zA-Z .,!?]//g'], stdin=cat_out.stdout,
                     stdout=PIPE)
    Popen(['espeak', '-v', language, '-w', output_wav_file[:-4] + '.wav'],
           stdin=sed_out.stdout)
    time.sleep(3)

def convert_wav_2_mp3(input_wav_file):
    call(['lame', '-f', input_wav_file[:-4] + '.wav', 
          input_wav_file[:-4] + '.mp3'])
    os.remove(input_wav_file[:-4] + '.wav')

# espeak creates multiple wav, how many is obtained here
def nr_wav_files(valid_path,filename_out):
    x = 0
    for root, dirs, files in os.walk(valid_path):
        for f in files:
            if str.find(f,filename_out[:-4] + '.wav_') >= 0:
                x = x + 1
    return x

def join_wav_files(filename_out):
    join_wav = filename_out[:-4] + '.wav '
    for wav_files in range(nr_wav_files('.', filename_out)-1):
        if wav_files < 9:
            join_wav += filename_out[:-4] + '.wav_0' + str(wav_files + 1) + ' '
        else:
            join_wav += filename_out[:-4] + '.wav_' + str(wav_files + 1) + ' '
    print join_wav
    os.system('wavmerge -o merged.wav ' + join_wav)
    for f in glob.glob('*.wav_*'):
        os.remove(f)
    os.rename('merged.wav', filename_out[:-4] + '.wav')

if __name__=='__main__':
    ret = main()
    sys.exit(ret)