This repository was archived by the owner on Feb 21, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
75 lines (63 loc) · 2.59 KB
/
app.py
File metadata and controls
75 lines (63 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import streamlit as st
import json
import base64
from docx import Document
from io import BytesIO, StringIO
import pdfplumber
# Function to extract text from PDF
def extract_text_from_pdf(file):
with pdfplumber.open(file) as pdf:
text = ''
for page in pdf.pages:
text += page.extract_text()
return text
# Function to extract text from Word
def extract_text_from_docx(file):
docx = Document(file)
text = '\n'.join([paragraph.text for paragraph in docx.paragraphs])
return text
# Function to get the JSON file download link
def get_json_download_link(json_file_name):
with open(json_file_name, 'r') as f:
data = f.read()
b64 = base64.b64encode(data.encode()).decode()
href = f'<a href="data:file/json;base64,{b64}" download="{json_file_name}">Download JSON File</a>'
return href
def main():
# Create text input fields for the title, author, and date
title = st.text_input('Title')
author = st.text_input('Author')
date = st.date_input('Date')
# Create a file uploader for the PDF
uploaded_file = st.file_uploader("Upload File", type=['pdf', 'docx', 'txt'])
# When the user clicks the 'Add Document' button, append their input to a list
documents = []
# Define JSON file name
json_file_name = 'documents.json'
if st.button('Add Document'):
if uploaded_file is not None:
file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type, "FileSize": uploaded_file.size}
if uploaded_file.type == 'application/pdf':
content = extract_text_from_pdf(uploaded_file)
elif uploaded_file.type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
content = extract_text_from_docx(uploaded_file)
elif uploaded_file.type == 'text/plain':
content = uploaded_file.read().decode()
documents.append({
'title': title,
'content': content,
'metadata': {
'author': author,
'date': date.isoformat() # Convert date to string in 'YYYY-MM-DD' format
}
})
# Save the list of documents to a JSON file immediately after adding
with open(json_file_name, 'w') as f:
json.dump(documents, f)
# Display the list of documents
st.write(documents)
# Add download button for the JSON file
if st.button('Download JSON File'):
st.markdown(get_json_download_link(json_file_name), unsafe_allow_html=True)
if __name__ == '__main__':
main()