-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheml_name_change.py
More file actions
57 lines (42 loc) · 1.49 KB
/
eml_name_change.py
File metadata and controls
57 lines (42 loc) · 1.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from random import randint
import concurrent.futures
import tqdm
import os
import re
from fast_mail_parser import parse_email
ROOT_DIR = "/Users/dvir/projects/fast_eml_parse/output/carmen"
WORKER_THREADS = 2
files_counter = 0
# calculate file num
for root, _, files in os.walk(ROOT_DIR):
for file in files:
if file.endswith(".eml"):
files_counter += 1
pbar = tqdm.tqdm(total=files_counter)
def rename_eml_file(eml_file_path):
pbar.update(1)
with open(eml_file_path, "r") as f:
message_payload = f.read()
mail = parse_email(message_payload)
new_file_name = re.sub(r"[^\w\s]", "", mail.subject) + ".eml"
new_file_path = os.path.abspath(
os.path.join(os.path.dirname(eml_file_path), new_file_name)
)
# If the file already exists, append a number to the end of the file name.
if os.path.exists(new_file_path):
new_file_name = new_file_name + f"{randint(0,100)}.eml"
new_file_path = os.path.abspath(
os.path.join(os.path.dirname(eml_file_path), new_file_name)
)
os.rename(eml_file_path, new_file_path)
# create a ThreadPoolExecutor
executor = concurrent.futures.ThreadPoolExecutor(max_workers=WORKER_THREADS)
for root, _, files in os.walk(ROOT_DIR):
for file in files:
if file.endswith(".eml"):
eml_file_path = os.path.join(root, file)
executor.submit(rename_eml_file, eml_file_path)
# wait for all tasks to finish
executor.shutdown(wait=True)
# close the progress bar
pbar.close()