-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feature:Logging] Preferred name logging sysadmin script (#16)
* Create preferred_name_logging.php Sysadmin tool portion as part of the logging request under Submitty/Submitty#3273 ("Preferred name is forgotten/removed without any indication why") Initial code entry. Testing and tweaking needed. WIP. * Update preferred_name_logging.php Psql can generate several logfiles that need to be parsed. * Update preferred_name_logging.php WIP * Update preferred_name_logging.php WIP * Update preferred_name_logging.php WIP * Update preferred_name_logging.php WIP * Update preferred_name_logging.php PR Candidate * Delete preferred_name_logging.php~ Unneeded temp file * Update submitty_student_auto_feed.php Add AUTH(entication) logging to student auto feed for preferred name change tracking. * Create readme.md Instructions on using the preferred name logging tool. * Update preferred_name_logging.php Update to log USER_ID token.
- Loading branch information
Showing
3 changed files
with
326 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,229 @@ | ||
#!/usr/bin/env php | ||
<?php | ||
/** | ||
* Preferred Name Logging syadmin tool for Submitty | ||
* | ||
* This script is to be run on the same server as Postgresql. When run, it | ||
* will parse Postgresql's CSV logs for the previous day and compile a new | ||
* log of any changes to a user's preferred first and last names. | ||
* | ||
* @author Peter Bailie | ||
*/ | ||
|
||
/** Main class */ | ||
class main { | ||
|
||
/** | ||
* Config | ||
* | ||
* @var const array | ||
* @access private | ||
*/ | ||
private const CONFIG = array( | ||
'timezone' => "America/New_York", | ||
'postgresql_logfile_path' => "/var/log/postgresql/", | ||
'submitty_logfile_path' => "/var/log/submitty/", | ||
'postgresql_logfile' => "postgresql", | ||
'submitty_logfile' => "submitty_preferred_names", | ||
'submitty_log_retention' => 7 | ||
); | ||
|
||
private const CONSTANTS = array( | ||
'postgresql_log_row_count' => 23, | ||
'psql_validation_update' => 7, | ||
'psql_validation_log' => 11, | ||
'psql_validation_pfn' => 13, | ||
'psql_data_date' => 8, | ||
'psql_data_auth' => 19, | ||
'psql_data_pfn' => 14 | ||
); | ||
|
||
/** | ||
* Method to invoke to run this program: main::run() | ||
* | ||
* @access public | ||
* @static | ||
*/ | ||
public static function run() { | ||
// make sure we are running as root from cli | ||
switch (true) { | ||
case posix_getuid() !== 0: | ||
case PHP_SAPI !== 'cli': | ||
fprintf(STDERR, "This is a command line script. Root required.%s", PHP_EOL); | ||
exit(1); | ||
} | ||
|
||
date_default_timezone_set(self::CONFIG['timezone']); | ||
|
||
self::parse_and_write_logs(); | ||
self::log_retention_and_deletion(); | ||
|
||
exit(0); | ||
} | ||
|
||
/** | ||
* Process method | ||
* | ||
* @access private | ||
* @static | ||
*/ | ||
private static function parse_and_write_logs() { | ||
//Check to make sure Submitty log directory path exists. Create it if needed. | ||
if (file_exists(self::CONFIG['submitty_logfile_path']) === false) { | ||
if (mkdir(self::CONFIG['submitty_logfile_path'], 0700) === false) { | ||
self::log("Submitty log folder needed, mkdir failed."); | ||
exit(1); | ||
} | ||
} | ||
|
||
//Prepare submitty preferred name change log file for today. | ||
$submitty_logfile = sprintf("%s%s_%s.log", self::CONFIG['submitty_logfile_path'], self::CONFIG['submitty_logfile'], date("Y-m-d")); | ||
$submitty_fh = fopen($submitty_logfile, 'w'); | ||
if ($submitty_fh === false) { | ||
self::log("Cannot create Submitty logfile."); | ||
exit(1); | ||
} else { | ||
fwrite($submitty_fh, "Log opened." . PHP_EOL); | ||
} | ||
|
||
//There can be multiple psql log files that need to be read. | ||
//But we want the ones dated one day prior (hence subtract 86400 seconds which is one day) | ||
$preg_str = sprintf("~^%s\-%s_\d{6}\.csv$~", self::CONFIG['postgresql_logfile'], preg_quote(date("Y-m-d", time() - 86400))); | ||
$logfiles = preg_grep($preg_str, scandir(self::CONFIG['postgresql_logfile_path'])); | ||
|
||
foreach ($logfiles as $logfile) { | ||
$psql_fh = fopen(self::CONFIG['postgresql_logfile_path'] . $logfile, 'r'); | ||
if ($psql_fh === false) { | ||
self::log("Cannot open {$logfile}."); | ||
continue; | ||
} | ||
|
||
$psql_row = fgetcsv($psql_fh); | ||
$psql_row_num = 1; | ||
while($psql_row !== false) { | ||
//Validation. Any case is true, validation fails and row is ignored. | ||
switch(true) { | ||
case count($psql_row) !== self::CONSTANTS['postgresql_log_row_count']: | ||
self::log(sprintf("NOTICE: PSQL log row %d had %d columns. %d columns expected. Row ignored.%s", $psql_row_num, count($psql_row), self::CONFIG['postgresql_log_row_count'])); | ||
case $psql_row[self::CONSTANTS['psql_validation_update']] !== "UPDATE": | ||
case $psql_row[self::CONSTANTS['psql_validation_log']] !== "LOG": | ||
case $psql_row[self::CONSTANTS['psql_validation_pfn']] !== "PREFERRED_NAME DATA UPDATE": | ||
$psql_row = fgetcsv($psql_fh); | ||
$psql_row_num++; | ||
continue 2; | ||
} | ||
|
||
//Validation successful, process row. | ||
//Trim all values in the row | ||
$psql_row = array_map('trim', $psql_row); | ||
|
||
//Get date token. | ||
$date = $psql_row[self::CONSTANTS['psql_data_date']]; | ||
|
||
//Get "AUTH" token (who logged the change). | ||
$key = array(); | ||
if (preg_match("~/\* AUTH: \"[\w\-]+\" \*/~", $psql_row[19]) === 1) { | ||
$key['start'] = strpos($psql_row[self::CONSTANTS['psql_data_auth']], "/* AUTH: ") + 3; | ||
$key['end'] = strpos($psql_row[self::CONSTANTS['psql_data_auth']], " */"); | ||
$auth = " | " . substr($psql_row[self::CONSTANTS['psql_data_auth']], $key['start'], $key['end'] - $key['start']); | ||
} else { | ||
$auth = " | AUTH NOT LOGGED "; | ||
//Anything sent to STDERR gets emailed by cron. | ||
fprintf(STDERR, "WARNING: AUTH NOT LOGGED%s%s", PHP_EOL, var_export($psql_row, true)); | ||
} | ||
|
||
//Get user_id and preferred name change tokens. | ||
$preferred_name = array(); | ||
$preferred_names_data = explode(" ", $psql_row[self::CONSTANTS['psql_data_pfn']]); | ||
|
||
//user_id token | ||
$key = array_search("USER_ID:", $preferred_names_data); | ||
if ($key !== false) { | ||
$user_id = " | USER_ID: {$preferred_names_data[$key+1]} "; | ||
} else { | ||
$user_id = " | USER_ID NOT LOGGED "; | ||
//Anything sent to STDERR gets emailed by cron. | ||
fprintf(STDERR, "WARNING: USER ID NOT LOGGED%s%s", PHP_EOL, var_export($psql_row, true)); | ||
} | ||
|
||
$key = array_search("PREFERRED_FIRSTNAME", $preferred_names_data); | ||
if ($key !== false) { | ||
$preferred_name['first']['old'] = $preferred_names_data[$key+2]; | ||
$preferred_name['first']['new'] = $preferred_names_data[$key+4]; | ||
} | ||
// It is possible that no Preferred Firstname was logged, in which we can ignore an move on. | ||
|
||
$key = array_search("PREFERRED_LASTNAME", $preferred_names_data); | ||
if ($key !== false) { | ||
$preferred_name['last']['old'] = $preferred_names_data[$key+2]; | ||
$preferred_name['last']['new'] = $preferred_names_data[$key+4]; | ||
} | ||
// It is possible that no Preferred Lastname was logged, in which we can ignore an move on. | ||
|
||
//Build preferred name change log entry. | ||
$submitty_log = $date . $auth . $user_id; | ||
if (isset($preferred_name['first'])) { | ||
$submitty_log .= " | PREFERRED_FIRSTNAME OLD: {$preferred_name['first']['old']} NEW: {$preferred_name['first']['new']}"; | ||
} else { | ||
$submitty_log .= " | PREFERRED_FIRSTNAME UNCHANGED"; | ||
} | ||
|
||
if (isset($preferred_name['last'])) { | ||
$submitty_log .= " | PREFERRED_LASTNAME OLD: {$preferred_name['last']['old']} NEW: {$preferred_name['last']['new']}"; | ||
} else { | ||
$submitty_log .= " | PREFERRED_LASTNAME UNCHANGED"; | ||
} | ||
|
||
//Write log entry and go to next row. | ||
fwrite($submitty_fh, $submitty_log . PHP_EOL); | ||
$psql_row = fgetcsv($psql_fh); | ||
$psql_row_num++; | ||
} | ||
|
||
fclose($psql_fh); | ||
} | ||
|
||
fwrite($submitty_fh, "Log closed." . PHP_EOL); | ||
fclose($submitty_fh); | ||
} | ||
|
||
/** | ||
* Automatically remove old logs | ||
* | ||
* @access private | ||
* @static | ||
*/ | ||
private static function log_retention_and_deletion() { | ||
$preg_str = sprintf("~^%s_%s.log$~", self::CONFIG['submitty_logfile'], preg_quote(date("m-d-Y"))); | ||
$logfiles = preg_grep($preg_str, scandir(self::CONFIG['submitty_logfile_path'])); | ||
$expiration_epoch = (int)(strtotime(date('Y-m-d')) / 86400) - self::CONFIG['submitty_log_retention']; | ||
|
||
foreach($logfiles as $logfile) { | ||
$datestamp = substr($logfile, strpos($logfile, "_") + 1, 10); | ||
$datestamp_epoch = (int)(strtotime($datestamp) / 86400); | ||
if ($datestamp_epoch < $expiration_epoch) { | ||
if (unlink(self::CONFIG['submitty_logfile_path'] . $logfile) === false) { | ||
self::log("Could not delete old logfile: {$logfile}"); | ||
} | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Log messages to error log and STDERR. | ||
* | ||
* @access private | ||
* @static | ||
*/ | ||
private static function log(string $msg) { | ||
$datestamp = date("m-d-Y"); | ||
error_log(sprintf("%s %s", $datestamp, $msg), 0); | ||
fprintf(STDERR, "%s%s", $msg, PHP_EOL); | ||
} | ||
} | ||
|
||
// Start processing. | ||
main::run(); | ||
|
||
// EOF | ||
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
# Preferred Name Logging | ||
|
||
This tool will help track when user preferred names are changed. It attempts to | ||
log who was authenticated for the change, and what change occurred. | ||
|
||
It works by first having Postgresql log the required information as the data is | ||
updated in Submitty's databases. Then the sysadmin tool in this folder will | ||
scrape the Postgresql logfile and record only those entries that showed | ||
preferred name change. | ||
|
||
## FERPA | ||
|
||
Data processed and logged by this tool may be protected by | ||
[FERPA (20 U.S.C. § 1232g)](https://www2.ed.gov/policy/gen/guid/fpco/ferpa/index.html). | ||
Please consult and abide by your institute's data protection policies. | ||
|
||
## Postgresql | ||
|
||
Submitty's website code and database schema will instruct Postgresql to log any | ||
`UPDATE` query involving any user's preferred first or last name changes. | ||
|
||
### Postgresql Configuration | ||
|
||
Postgresql needs to be configured to produce the required logs as the necessary | ||
logging is not enabled by default. | ||
|
||
#### Config file | ||
|
||
- Config file is `Postgresql.conf` | ||
- In Ubuntu 18.04, it is located in `/etc/postgresql/10/main/postgresql.conf` | ||
|
||
#### Config options | ||
|
||
You will find these options under "ERROR REPORTING AND LOGGING". Please enable | ||
(remove the '#' symbol preceding the line) and set these values: | ||
|
||
- `log_destination = 'csvlog'` | ||
- The sysadmin tool will scrape a CSV file. | ||
- `logging_collector = on` | ||
- Postgresql doesn't write logs without. this. | ||
- `log_directory = '/var/log/postgresql'` | ||
- You can use a different folder, if needed. | ||
- `log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'` | ||
- You can change the filename a little, but it **must** end with `-%Y-%m-%d_%H%M%S.log` | ||
- `log_file_mode = 0600` | ||
- Limits access to this logfile. | ||
- `log_rotation_age = 2d` | ||
- At least 2 days of logs will be needed since the sysadmin tool is intended | ||
to run the following day. | ||
- `log_rotation_size = 10MB` | ||
- This doesn't need to be changed. Any additional logfiles created for a | ||
single day will be picked up by the sysadmin tool. | ||
- `log_min_messages = log` | ||
- Preferred name changes are logged at the `log` level. You can set any | ||
higher level of detail, but not lower than `log`. | ||
- `log_min_duration_statement = 0` | ||
- We want all log instances regardless of process time. | ||
- `log_line_prefix = '%m [%p] %q%u@%d '` | ||
- This can be changed so long as the very first detail is a timestamp. The | ||
sysadmin tool expects to find a timestamp at the very first column. | ||
|
||
## Sysadmin Tool | ||
|
||
The sysadmin tool needs to be run on a machine with local file access to the | ||
Postgresql log file. It is written in PHP. | ||
|
||
### Sysdamin Tool Config | ||
|
||
The configuration is defined as a class constant. Near the top of the code is a | ||
small block starting as `private const CONFIG = array(`. Inside the block will | ||
be a config element in single quotes, an arrow like `=>`, a value possibly | ||
enclosed in double quotes, followed by a comma. (Do not lose the comma!) | ||
|
||
- `'timezone' => "America/New_York",` | ||
- Set this to your local timezone. q.v. | ||
[https://www.php.net/manual/en/timezones.php](https://www.php.net/manual/en/timezones.php) | ||
for more information. | ||
- `'postgresql_logfile_path' => "/var/log/postgresql/",` | ||
- Set this to the same setting as `log_directory` in `postgresql.conf`. | ||
- `'submitty_logfile_path' => "/var/log/submitty/",` | ||
- Where the sysadmin tool will write the preferred name logfile. | ||
- `'postgresql_logfile' => "postgresql",` | ||
- The name of the logfile created by Postgresql. Do not include the time | ||
stamp. This only needs to be changed when `log_filename` in | ||
`postgresql.conf` is changed. | ||
- `'submitty_logfile' => "submitty_preferred_names",` | ||
- Name of the preferred name change logfile. You can leave this as is. | ||
- `'submitty_log_retention' => 7` | ||
- How many days of preferred name change logs to keep. | ||
|
||
### Running the Sysadmin Tool | ||
|
||
This tool is meant to be executed on the command line and can be scheduled as a | ||
cron job. Errors will be outputted to `STDERR`, which in a crontab can either | ||
be redirected to `/dev/null` or emailed to a sysadmin. Running as `root` is | ||
required, and there are no command line arguments. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters