From 7867846424b59f1f8bccb956740bbb5ed6512bd8 Mon Sep 17 00:00:00 2001 From: Peter Bailie Date: Tue, 30 Jul 2019 00:22:50 -0400 Subject: [PATCH] [Feature:Logging] Preferred name logging sysadmin script (#16) * Create preferred_name_logging.php Sysadmin tool portion as part of the logging request under submitty/submitty#3273 ("Preferred name is forgotten/removed without any indication why") Initial code entry. Testing and tweaking needed. WIP. * Update preferred_name_logging.php Psql can generate several logfiles that need to be parsed. * Update preferred_name_logging.php WIP * Update preferred_name_logging.php WIP * Update preferred_name_logging.php WIP * Update preferred_name_logging.php WIP * Update preferred_name_logging.php PR Candidate * Delete preferred_name_logging.php~ Unneeded temp file * Update submitty_student_auto_feed.php Add AUTH(entication) logging to student auto feed for preferred name change tracking. * Create readme.md Instructions on using the preferred name logging tool. * Update preferred_name_logging.php Update to log USER_ID token. --- .../preferred_name_logging.php | 229 ++++++++++++++++++ preferred_name_logging/readme.md | 96 ++++++++ .../submitty_student_auto_feed.php | 1 + 3 files changed, 326 insertions(+) create mode 100755 preferred_name_logging/preferred_name_logging.php create mode 100644 preferred_name_logging/readme.md diff --git a/preferred_name_logging/preferred_name_logging.php b/preferred_name_logging/preferred_name_logging.php new file mode 100755 index 0000000..9c475d2 --- /dev/null +++ b/preferred_name_logging/preferred_name_logging.php @@ -0,0 +1,229 @@ +#!/usr/bin/env php + "America/New_York", + 'postgresql_logfile_path' => "/var/log/postgresql/", + 'submitty_logfile_path' => "/var/log/submitty/", + 'postgresql_logfile' => "postgresql", + 'submitty_logfile' => "submitty_preferred_names", + 'submitty_log_retention' => 7 + ); + + private const CONSTANTS = array( + 'postgresql_log_row_count' => 23, + 'psql_validation_update' => 7, + 'psql_validation_log' => 11, + 'psql_validation_pfn' => 13, + 'psql_data_date' => 8, + 'psql_data_auth' => 19, + 'psql_data_pfn' => 14 + ); + + /** + * Method to invoke to run this program: main::run() + * + * @access public + * @static + */ + public static function run() { + // make sure we are running as root from cli + switch (true) { + case posix_getuid() !== 0: + case PHP_SAPI !== 'cli': + fprintf(STDERR, "This is a command line script. Root required.%s", PHP_EOL); + exit(1); + } + + date_default_timezone_set(self::CONFIG['timezone']); + + self::parse_and_write_logs(); + self::log_retention_and_deletion(); + + exit(0); + } + + /** + * Process method + * + * @access private + * @static + */ + private static function parse_and_write_logs() { + //Check to make sure Submitty log directory path exists. Create it if needed. + if (file_exists(self::CONFIG['submitty_logfile_path']) === false) { + if (mkdir(self::CONFIG['submitty_logfile_path'], 0700) === false) { + self::log("Submitty log folder needed, mkdir failed."); + exit(1); + } + } + + //Prepare submitty preferred name change log file for today. + $submitty_logfile = sprintf("%s%s_%s.log", self::CONFIG['submitty_logfile_path'], self::CONFIG['submitty_logfile'], date("Y-m-d")); + $submitty_fh = fopen($submitty_logfile, 'w'); + if ($submitty_fh === false) { + self::log("Cannot create Submitty logfile."); + exit(1); + } else { + fwrite($submitty_fh, "Log opened." . PHP_EOL); + } + + //There can be multiple psql log files that need to be read. + //But we want the ones dated one day prior (hence subtract 86400 seconds which is one day) + $preg_str = sprintf("~^%s\-%s_\d{6}\.csv$~", self::CONFIG['postgresql_logfile'], preg_quote(date("Y-m-d", time() - 86400))); + $logfiles = preg_grep($preg_str, scandir(self::CONFIG['postgresql_logfile_path'])); + + foreach ($logfiles as $logfile) { + $psql_fh = fopen(self::CONFIG['postgresql_logfile_path'] . $logfile, 'r'); + if ($psql_fh === false) { + self::log("Cannot open {$logfile}."); + continue; + } + + $psql_row = fgetcsv($psql_fh); + $psql_row_num = 1; + while($psql_row !== false) { + //Validation. Any case is true, validation fails and row is ignored. + switch(true) { + case count($psql_row) !== self::CONSTANTS['postgresql_log_row_count']: + self::log(sprintf("NOTICE: PSQL log row %d had %d columns. %d columns expected. Row ignored.%s", $psql_row_num, count($psql_row), self::CONFIG['postgresql_log_row_count'])); + case $psql_row[self::CONSTANTS['psql_validation_update']] !== "UPDATE": + case $psql_row[self::CONSTANTS['psql_validation_log']] !== "LOG": + case $psql_row[self::CONSTANTS['psql_validation_pfn']] !== "PREFERRED_NAME DATA UPDATE": + $psql_row = fgetcsv($psql_fh); + $psql_row_num++; + continue 2; + } + + //Validation successful, process row. + //Trim all values in the row + $psql_row = array_map('trim', $psql_row); + + //Get date token. + $date = $psql_row[self::CONSTANTS['psql_data_date']]; + + //Get "AUTH" token (who logged the change). + $key = array(); + if (preg_match("~/\* AUTH: \"[\w\-]+\" \*/~", $psql_row[19]) === 1) { + $key['start'] = strpos($psql_row[self::CONSTANTS['psql_data_auth']], "/* AUTH: ") + 3; + $key['end'] = strpos($psql_row[self::CONSTANTS['psql_data_auth']], " */"); + $auth = " | " . substr($psql_row[self::CONSTANTS['psql_data_auth']], $key['start'], $key['end'] - $key['start']); + } else { + $auth = " | AUTH NOT LOGGED "; + //Anything sent to STDERR gets emailed by cron. + fprintf(STDERR, "WARNING: AUTH NOT LOGGED%s%s", PHP_EOL, var_export($psql_row, true)); + } + + //Get user_id and preferred name change tokens. + $preferred_name = array(); + $preferred_names_data = explode(" ", $psql_row[self::CONSTANTS['psql_data_pfn']]); + + //user_id token + $key = array_search("USER_ID:", $preferred_names_data); + if ($key !== false) { + $user_id = " | USER_ID: {$preferred_names_data[$key+1]} "; + } else { + $user_id = " | USER_ID NOT LOGGED "; + //Anything sent to STDERR gets emailed by cron. + fprintf(STDERR, "WARNING: USER ID NOT LOGGED%s%s", PHP_EOL, var_export($psql_row, true)); + } + + $key = array_search("PREFERRED_FIRSTNAME", $preferred_names_data); + if ($key !== false) { + $preferred_name['first']['old'] = $preferred_names_data[$key+2]; + $preferred_name['first']['new'] = $preferred_names_data[$key+4]; + } + // It is possible that no Preferred Firstname was logged, in which we can ignore an move on. + + $key = array_search("PREFERRED_LASTNAME", $preferred_names_data); + if ($key !== false) { + $preferred_name['last']['old'] = $preferred_names_data[$key+2]; + $preferred_name['last']['new'] = $preferred_names_data[$key+4]; + } + // It is possible that no Preferred Lastname was logged, in which we can ignore an move on. + + //Build preferred name change log entry. + $submitty_log = $date . $auth . $user_id; + if (isset($preferred_name['first'])) { + $submitty_log .= " | PREFERRED_FIRSTNAME OLD: {$preferred_name['first']['old']} NEW: {$preferred_name['first']['new']}"; + } else { + $submitty_log .= " | PREFERRED_FIRSTNAME UNCHANGED"; + } + + if (isset($preferred_name['last'])) { + $submitty_log .= " | PREFERRED_LASTNAME OLD: {$preferred_name['last']['old']} NEW: {$preferred_name['last']['new']}"; + } else { + $submitty_log .= " | PREFERRED_LASTNAME UNCHANGED"; + } + + //Write log entry and go to next row. + fwrite($submitty_fh, $submitty_log . PHP_EOL); + $psql_row = fgetcsv($psql_fh); + $psql_row_num++; + } + + fclose($psql_fh); + } + + fwrite($submitty_fh, "Log closed." . PHP_EOL); + fclose($submitty_fh); + } + + /** + * Automatically remove old logs + * + * @access private + * @static + */ + private static function log_retention_and_deletion() { + $preg_str = sprintf("~^%s_%s.log$~", self::CONFIG['submitty_logfile'], preg_quote(date("m-d-Y"))); + $logfiles = preg_grep($preg_str, scandir(self::CONFIG['submitty_logfile_path'])); + $expiration_epoch = (int)(strtotime(date('Y-m-d')) / 86400) - self::CONFIG['submitty_log_retention']; + + foreach($logfiles as $logfile) { + $datestamp = substr($logfile, strpos($logfile, "_") + 1, 10); + $datestamp_epoch = (int)(strtotime($datestamp) / 86400); + if ($datestamp_epoch < $expiration_epoch) { + if (unlink(self::CONFIG['submitty_logfile_path'] . $logfile) === false) { + self::log("Could not delete old logfile: {$logfile}"); + } + } + } + } + + /** + * Log messages to error log and STDERR. + * + * @access private + * @static + */ + private static function log(string $msg) { + $datestamp = date("m-d-Y"); + error_log(sprintf("%s %s", $datestamp, $msg), 0); + fprintf(STDERR, "%s%s", $msg, PHP_EOL); + } +} + +// Start processing. +main::run(); + +// EOF +?> diff --git a/preferred_name_logging/readme.md b/preferred_name_logging/readme.md new file mode 100644 index 0000000..9e0f16f --- /dev/null +++ b/preferred_name_logging/readme.md @@ -0,0 +1,96 @@ +# Preferred Name Logging + +This tool will help track when user preferred names are changed. It attempts to +log who was authenticated for the change, and what change occurred. + +It works by first having Postgresql log the required information as the data is +updated in Submitty's databases. Then the sysadmin tool in this folder will +scrape the Postgresql logfile and record only those entries that showed +preferred name change. + +## FERPA + +Data processed and logged by this tool may be protected by +[FERPA (20 U.S.C. ยง 1232g)](https://www2.ed.gov/policy/gen/guid/fpco/ferpa/index.html). +Please consult and abide by your institute's data protection policies. + +## Postgresql + +Submitty's website code and database schema will instruct Postgresql to log any +`UPDATE` query involving any user's preferred first or last name changes. + +### Postgresql Configuration + +Postgresql needs to be configured to produce the required logs as the necessary +logging is not enabled by default. + +#### Config file + +- Config file is `Postgresql.conf` + - In Ubuntu 18.04, it is located in `/etc/postgresql/10/main/postgresql.conf` + +#### Config options + +You will find these options under "ERROR REPORTING AND LOGGING". Please enable +(remove the '#' symbol preceding the line) and set these values: + +- `log_destination = 'csvlog'` + - The sysadmin tool will scrape a CSV file. +- `logging_collector = on` + - Postgresql doesn't write logs without. this. +- `log_directory = '/var/log/postgresql'` + - You can use a different folder, if needed. +- `log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'` + - You can change the filename a little, but it **must** end with `-%Y-%m-%d_%H%M%S.log` +- `log_file_mode = 0600` + - Limits access to this logfile. +- `log_rotation_age = 2d` + - At least 2 days of logs will be needed since the sysadmin tool is intended + to run the following day. +- `log_rotation_size = 10MB` + - This doesn't need to be changed. Any additional logfiles created for a + single day will be picked up by the sysadmin tool. +- `log_min_messages = log` + - Preferred name changes are logged at the `log` level. You can set any + higher level of detail, but not lower than `log`. +- `log_min_duration_statement = 0` + - We want all log instances regardless of process time. +- `log_line_prefix = '%m [%p] %q%u@%d '` + - This can be changed so long as the very first detail is a timestamp. The + sysadmin tool expects to find a timestamp at the very first column. + +## Sysadmin Tool + +The sysadmin tool needs to be run on a machine with local file access to the +Postgresql log file. It is written in PHP. + +### Sysdamin Tool Config + +The configuration is defined as a class constant. Near the top of the code is a +small block starting as `private const CONFIG = array(`. Inside the block will +be a config element in single quotes, an arrow like `=>`, a value possibly +enclosed in double quotes, followed by a comma. (Do not lose the comma!) + +- `'timezone' => "America/New_York",` + - Set this to your local timezone. q.v. + [https://www.php.net/manual/en/timezones.php](https://www.php.net/manual/en/timezones.php) + for more information. +- `'postgresql_logfile_path' => "/var/log/postgresql/",` + - Set this to the same setting as `log_directory` in `postgresql.conf`. +- `'submitty_logfile_path' => "/var/log/submitty/",` + - Where the sysadmin tool will write the preferred name logfile. +- `'postgresql_logfile' => "postgresql",` + - The name of the logfile created by Postgresql. Do not include the time + stamp. This only needs to be changed when `log_filename` in + `postgresql.conf` is changed. +- `'submitty_logfile' => "submitty_preferred_names",` + - Name of the preferred name change logfile. You can leave this as is. +- `'submitty_log_retention' => 7` + - How many days of preferred name change logs to keep. + +### Running the Sysadmin Tool + +This tool is meant to be executed on the command line and can be scheduled as a +cron job. Errors will be outputted to `STDERR`, which in a crontab can either +be redirected to `/dev/null` or emailed to a sysadmin. Running as `root` is +required, and there are no command line arguments. diff --git a/student_auto_feed/submitty_student_auto_feed.php b/student_auto_feed/submitty_student_auto_feed.php index be8d9a1..39eac9c 100755 --- a/student_auto_feed/submitty_student_auto_feed.php +++ b/student_auto_feed/submitty_student_auto_feed.php @@ -534,6 +534,7 @@ private function upsert_psql() { user_email=upsert_users.user_email FROM upsert_users WHERE users.user_id=upsert_users.user_id +/* AUTH: "AUTO_FEED" */ SQL; $sql['registration_section']['update'] = null;