#!/usr/bin/env python # ################################################### # pdf2mp3.py - little script/program to convert a # pdf-file or ascii-file (.dat, .txt) into a mp3 audio or wav file # # Copyright (C) 2010 Hannes Rennau # hannes@bolding-burchard.com # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the # Free Software Foundation, Inc., # 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ################################################### # LIST OF PACKAGES NEEDED: # you need to install the following packages: # sudo apt-get install python poppler-utils festival festvox-rablpc16k # lame espeak wavbreaker # HOW TO USE: # 1.create a file with the name pdf2mp3 and copy the content of # the whole text in there # 2.make the file an executable via: # >>> chmod +x pdf2mp3 # 3.copy file to /usr/bin to make usage of program possible from everywhere # on your computer: >>> sudo cp pdf2mp3 /usr/bin/ # 4.after that get help calling: # pdf2mp3 -h # # 5.EXAMPLE: # you want to convert yourfilename.pdf into a mp3 file, then just type: # pdf2mp3 -v en -f yourfilename.pdf -o yourfilename.mp3 # (for the english voice 'en', for german voice 'de', # type: espeak --voices to get list of voices available on your system) # # # edited by busfahrer 24.07.2010 import os,sys import string import time import glob from subprocess import call, Popen, PIPE from optparse import OptionParser as op def main(): """Parses command line """ parser = op(usage='%prog -v [de,en,...] -f filename[.pdf|.txt|.dat] \ -o [wav|mp3] [optional: --ascii]', description='This script convertes ASCII files (basically those files with \ extension .txt or .dat) or pdf files into an mp3 (or wav) \ audio file.',version=r'$v0.1$') parser.add_option('-v','--voice', type='string',metavar='VOICENAME', help='name of the voice to be used.\ type: ***espeak --voices*** to get list of available \ voices on your system.') parser.add_option('-f','--file', type='string',metavar='SOURCEFILENAME', help='input path of file to read \ (and late on convert to audio file).This can be a pdf or \ ascii (.txt or .dat) file. extension must be given!') parser.add_option('-o','--output', type='string',metavar='OUTPUTFILENAME', help='Output filename (with extension .wav or .mp3 that \ the program knows which audio format you want.)') options,args = parser.parse_args() if options.voice is None: print 'no voice name given, use -v voicename \ [type ***espeak --voices*** for list of available voices]' return 2 if options.file is None: print 'no input file name given [please use: -f filename]' return 2 if options.output is None: print 'no output file name given \ [please use: -o outputfilename.[wav|mp3]' return 2 filename_inp = str(options.file) filename_out = str(options.output) ifwav = False ifmp3 = False if filename_out[-4:] != '.wav' and filename_out[-4:] != '.mp3': print 'please decide whether you want wav or mp3 format by \ typing -o filename.wav or -o filename.mp3' elif filename_out[-4:] == '.wav': ifwav = True else: ifmp3 = True if os.path.isfile(filename_inp) and (ifwav or ifmp3): if filename_inp[-4:] != '.dat' and filename_inp[-4:] != '.txt' and \ filename_inp[-4:] != '.pdf': print '*** input file does not have extension (.txt, .dat, .pdf) ***' elif filename_inp[-4:] == '.pdf': print 'converting pdf file: ' + filename_inp + ' to ASCII' pdf_convert_to_ascii(filename_inp) if ifwav: convert_to_wav(filename_inp[:-4] + '.txt', filename_out, options.voice) join_wav_files(filename_out) elif ifmp3: convert_to_wav(filename_inp[:-4] + '.txt', filename_out, options.voice) join_wav_files(filename_out) convert_wav_2_mp3(filename_out) elif filename_inp[-4:] == '.dat' or filename_inp[-4:] == '.txt': if ifwav: convert_to_wav(filename_inp, filename_out, options.voice) join_wav_files(filename_out) elif ifmp3: convert_to_wav(filename_inp, filename_out, options.voice) join_wav_files(filename_out) convert_wav_2_mp3(filename_out) else: print '*** input file %s does not exist ***'% filename_inp print ifmp3,ifwav def pdf_convert_to_ascii(input_pdf_file): call(['pdftotext', input_pdf_file, input_pdf_file[:-4] + '.txt']) def convert_to_wav(input_ascii_file,output_wav_file,language): cat_out = Popen(['cat', input_ascii_file], stdout=PIPE) sed_out = Popen(['sed', 's/[^a-zA-Z .,!?]//g'], stdin=cat_out.stdout, stdout=PIPE) Popen(['espeak', '-v', language, '-w', output_wav_file[:-4] + '.wav'], stdin=sed_out.stdout) time.sleep(3) def convert_wav_2_mp3(input_wav_file): call(['lame', '-f', input_wav_file[:-4] + '.wav', input_wav_file[:-4] + '.mp3']) os.remove(input_wav_file[:-4] + '.wav') # espeak creates multiple wav, how many is obtained here def nr_wav_files(valid_path,filename_out): x = 0 for root, dirs, files in os.walk(valid_path): for f in files: if str.find(f,filename_out[:-4] + '.wav_') >= 0: x = x + 1 return x def join_wav_files(filename_out): join_wav = filename_out[:-4] + '.wav ' for wav_files in range(nr_wav_files('.', filename_out)-1): if wav_files < 9: join_wav += filename_out[:-4] + '.wav_0' + str(wav_files + 1) + ' ' else: join_wav += filename_out[:-4] + '.wav_' + str(wav_files + 1) + ' ' print join_wav os.system('wavmerge -o merged.wav ' + join_wav) for f in glob.glob('*.wav_*'): os.remove(f) os.rename('merged.wav', filename_out[:-4] + '.wav') if __name__=='__main__': ret = main() sys.exit(ret)