-
Notifications
You must be signed in to change notification settings - Fork 369
/
AudioToPdfConverter.py
116 lines (104 loc) · 4.9 KB
/
AudioToPdfConverter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from tkinter import *
import tkinter.messagebox as mb
from path import Path
from PyPDF4.pdf import PdfFileReader as PDFreader, PdfFileWriter as PDFwriter
import pyttsx3
from speech_recognition import Recognizer, AudioFile
from pydub import AudioSegment
import os
# Initializing the GUI window
class Window(Tk):
def __init__(self):
super(Window, self).__init__()
self.title("PDF to Audio and Audio to PDF converter")
self.geometry('400x250')
self.resizable(0, 0)
self.config(bg='Black')
Label(self, text='PDF to Audio and Audio to PDF converter',
wraplength=400, bg='Black',
font=("Comic Sans MS", 15)).place(x=0, y=0)
Button(self, text="Convert PDF to Audio",
font=("Comic Sans MS", 15), bg='cyan',
command=self.pdf_to_audio, width=25).place(x=40, y=80)
Button(self, text="Convert Audio to PDF",
font=("Comic Sans MS", 15), bg='cyan',
command=self.audio_to_pdf, width=25).place(x=40, y=150)
def pdf_to_audio(self):
pta = Toplevel(self)
pta.title('Convert PDF to Audio')
pta.geometry('500x300')
pta.resizable(0, 0)
pta.config(bg='cyan')
Label(pta, text='Convert PDF to Audio', font=('Comic Sans MS', 15), bg='cyan').place(relx=0.3, y=0)
Label(pta, text='Enter the PDF file location (with extension): ', bg='cyan', font=("Verdana", 11)).place(
x=10, y=60)
filename = Entry(pta, width=32, font=('Verdana', 11))
filename.place(x=10, y=90)
Label(pta, text='Enter the page to read from the PDF (only one can be read): ', bg='cyan',
font=("Verdana", 11)).place(x=10, y=140)
page = Entry(pta, width=15, font=('Verdana', 11))
page.place(x=10, y=170)
Button(pta, text='Speak the text', font=('Gill Sans MT', 12), bg='Snow', width=20,
command=lambda: self.speak_text(filename.get(), page.get())).place(x=150, y=240)
def audio_to_pdf(self):
atp = Toplevel(self)
atp.title('Convert Audio to PDF')
atp.geometry('675x300')
atp.resizable(0, 0)
atp.config(bg='cyan')
Label(atp, text='Convert Audio to PDF', font=("Comic Sans MS", 15), bg='cyan').place(relx=0.36, y=0)
Label(atp, text='Enter the Audio File location that you want to read [in .wav or .mp3 extensions only]:',
bg='cyan', font=('Verdana', 11)).place(x=20, y=60)
audiofile = Entry(atp, width=58, font=('Verdana', 11))
audiofile.place(x=20, y=90)
Label(atp, text='Enter the PDF File location that you want to save the text in (with extension):',
bg='cyan', font=('Verdana', 11)).place(x=20, y=140)
pdffile = Entry(atp, width=58, font=('Verdana', 11))
pdffile.place(x=20, y=170)
Button(atp, text='Create PDF', bg='Snow', font=('Gill Sans MT', 12), width=20,
command=lambda: self.speech_recognition(audiofile.get(), pdffile.get())).place(x=247, y=230)
@staticmethod
def speak_text(filename, page):
if not filename or not page:
mb.showerror('Missing field!', 'Please check your responses,'
'because one of the fields is missing')
return
reader = PDFreader(filename)
engine = pyttsx3.init()
with Path(filename).open('rb'):
page_to_read = reader.getPage(int(page) - 1)
text = page_to_read.extractText()
engine.say(text)
engine.runAndWait()
@staticmethod
def write_text(filename, text):
writer = PDFwriter()
writer.addBlankPage(72, 72)
pdf_path = Path(filename)
with pdf_path.open('ab') as output_file:
writer.write(output_file)
output_file.write(text)
def speech_recognition(self, audio, pdf):
if not audio or not pdf:
mb.showerror('Missing field!', 'Please check your responses, '
'because one of the fields is missing')
return
audio_file_name = os.path.basename(audio).split('.')[0]
audio_file_extension = os.path.basename(audio).split('.')[1]
if audio_file_extension != 'wav' and audio_file_extension != 'mp3':
mb.showerror('Error!', 'The format of the audio file should '
'only be either "wav" and "mp3"!')
if audio_file_extension == 'mp3':
audio_file = AudioSegment.from_file(Path(audio), format='mp3')
audio_file.export(f'{audio_file_name}.wav', format='wav')
source_file = f'{audio_file_name}.wav'
r = Recognizer()
with AudioFile(source_file) as source:
r.pause_threshold = 5
speech = r.record(source)
text = r.recognize_google(speech)
self.write_text(pdf, text)
# Finalizing the GUI window
app = Window()
app.update()
app.mainloop()