-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtxt2lower.php
More file actions
executable file
·35 lines (27 loc) · 1.18 KB
/
txt2lower.php
File metadata and controls
executable file
·35 lines (27 loc) · 1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
<?php
//line start to lower : prend un fichier txt, met en minuscule les car en début de ligne sauf après une ponctuation forte, pour les noms de la castList et les noms propres figurant ailleurs
$files = glob($argv[1]);
foreach ($files as $file) {
$file_name = basename($file);
$names = array();
$text = file_get_contents($file);
$text = preg_replace("/(\s*\n)|\n+/u", "\n", $text); //espaces + lb, multiples lb > lb
$text = preg_replace("/\n[\s ]*/u", "\n", $text); //lb + espaces > lb
$text = preg_replace_callback('/([^.]\n)([A-Z])/u',
function ($word) {
return $word[1] . mb_strtolower($word[2], "UTF-8");
}
, $text);
preg_match_all("/([^.?!\(\)…] )([A-Z][^ \n.,;:?!…]*)/u", $text, $names);
//$names_lower = array_map('strtolower', $names[2]);
$array = array();
foreach($names[2] as $name){
$name = str_replace(array("(",")","[","]","/"), "", $name);
$array[] = "/(?=\n)".mb_strtolower($name,"UTF-8")."\b/u";
}
$text = preg_replace($array, $names[2], $text);
$text = str_replace("\n", " ", $text); //tt réagit aux \n ?
file_put_contents("../tcpt5i/" . $file_name, $text);
}
//revoir str_replace : exact match ?
?>