-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFY_converter.py
127 lines (116 loc) · 4.6 KB
/
FY_converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import streamlit as st
import pandas as pd
import requests, json
import time
import base64
st.experimental_memo.clear()
st.set_page_config(page_title="FY Converter")
st.title("Pubs: Convert Calendar Year to Fiscal Year")
headers = {'Mailto':'[email protected]'}
#create empty lists to which we will append API-gathered data
results_list = []
#convert dataframe to csv for exporting purposes
@st.experimental_memo(suppress_st_warning=True)
def convert_df_to_csv(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv(index=False).encode('utf-8')
#main function that uses list of DOIs with API call
@st.experimental_memo(suppress_st_warning=True)
def api_loop(dataframe):
global dates_df
for i in range(len(df)):
percent_complete = (i+1)/len(df)
try:
DOI = str(df.iloc[i]['DOI'].replace(' ',''))
except:
DOI = ''
title = ''
pub_date = ''
FY = ''
results_list.append([DOI,title,pub_date,FY])
my_bar.progress(percent_complete)
continue
r = requests.get('https://api.crossref.org/works/'+DOI+'[email protected]')
rText = r.text
try:
rJSON = json.loads(rText)
except:
DOI = ''
title = ''
pub_date = ''
FY = ''
results_list.append([DOI,title,pub_date,FY])
my_bar.progress(percent_complete)
continue
try:
title = rJSON['message']['title'][0]
except:
title = 'No Article Title Found'
try:
try:
year = rJSON['message']['published']['date-parts'][0][0]
except:
year = 'XXXX'
try:
month = rJSON['message']['published']['date-parts'][0][1]
except:
month = 'XX'
try:
day = rJSON['message']['published']['date-parts'][0][2]
except:
day = 'XX'
pub_date = str(year)+'-'+ \
str(month)+'-'+ \
str(day)
except:
pub_date = ''
try:
if month == 'XX':
FY = 'NA'
elif int(month) >= 10:
FY = int(year)+1
else:
FY = year
except:
FY = 'No published date found'
results_list.append([DOI,title,pub_date,FY])
my_bar.progress(percent_complete)
time.sleep(0.05)
dates_df = pd.DataFrame(results_list, columns = ['DOI','title','pub_date', 'FY'])
dates_df = dates_df.reset_index(drop=True)
dates_df['FY'] = dates_df['FY'].astype(str)
dates_df['pub_date'] = dates_df['pub_date'].astype(str)
#display final dataframe
dates_df = dates_df.drop_duplicates()
st.dataframe(dates_df)
st.markdown(get_table_download_link(dates_df), unsafe_allow_html=True)
@st.experimental_memo(suppress_st_warning=True)
def get_table_download_link(df):
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode() # some strings <-> bytes conversions necessary here
return f'<a href="data:file/csv;base64,{b64}" download="myfilename.csv">Download csv file</a>'
with st.form("my-form", clear_on_submit=True):
data = st.file_uploader('Upload data. Make sure you have a column labeled "DOI". The standard RES output format is acceptable',
key = '1',
help='This widget accepts both CSV and XLSX files. The standard RES output format is acceptable.')
submitted = st.form_submit_button("Start the Process")
if submitted and data is not None:
st.write("Your Data:")
if data.name.lower().endswith('.csv'):
df = pd.read_csv(data, header=[0])
#display dataframe of uploaded DOIs
st.dataframe(df)
#introduce streamlit proress bar widget
my_bar = st.progress(0.0)
api_loop(df)
st.balloons()
st.success('Your Download is Ready!')
elif data.name.lower().endswith('.xlsx'):
df = pd.read_excel(data, header=[0])
#display dataframe of uploaded DOIs
st.dataframe(df)
#introduce streamlit proress bar widget
my_bar = st.progress(0.0)
api_loop(df)
st.balloons()
st.success('Your Download is Ready!')