-
Notifications
You must be signed in to change notification settings - Fork 0
/
extented_homework.py
24 lines (20 loc) · 1.09 KB
/
extented_homework.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# Defining the new static thresholds for categorizing text length including "very long" category
def categorize_text_length_extended(length):
if length <= 300:
return "very short"
elif 301 <= length <= 600:
return "short"
elif 601 <= length <= 900:
return "medium"
elif 901 <= length <= 1200:
return "long"
else:
return "very long"
# Adding a new column for text length (number of characters) and categorizing it with the extended categories
df_train_sample['text_length'] = df_train_sample['text'].apply(len)
df_train_sample['length_category'] = df_train_sample['text_length'].apply(categorize_text_length_extended)
# Adding a new column for text length (number of characters) and categorizing it with the extended categories
df_test_sample['text_length'] = df_test_sample['text'].apply(len)
df_test_sample['length_category'] = df_test_sample['text_length'].apply(categorize_text_length_extended)
# Displaying the updated DataFrame with the text_length and length_category columns
df_test_sample[['text', 'text_length', 'length_category','label']].head()