From a9d633f40ed397bd62942c217132333038e7ce36 Mon Sep 17 00:00:00 2001 From: Peter Bailie Date: Mon, 6 Jan 2020 17:14:00 -0500 Subject: [PATCH] Preferred name logging update (#19) * Update preferred_name_logging.php Some refactoring and needed adjustments to satisfy request from @bmcutler in PR Submitty/Submitty#4153 * Update preferred_name_logging.php WIP * Update preferred_name_logging.php WIP * Update preferred_name_logging.php Better integration and improved error logging * Update preferred_name_logging.php WIP * Update preferred_name_logging.php WIP * Update readme.md WIP * Update readme.md WIP * Update readme.md WIP * Update preferred_name_logging.php WIP * Update readme.md and preferred_name_logging.php WIP * Update preferred_name_logging.php WIP * Update WIP * Update preferred_name_logging.php WIP (correct spacing of log output) --- .../preferred_name_logging.php | 319 +++++++++++++----- preferred_name_logging/readme.md | 155 ++++----- 2 files changed, 300 insertions(+), 174 deletions(-) diff --git a/preferred_name_logging/preferred_name_logging.php b/preferred_name_logging/preferred_name_logging.php index 9c475d2..07eeeb0 100755 --- a/preferred_name_logging/preferred_name_logging.php +++ b/preferred_name_logging/preferred_name_logging.php @@ -14,85 +14,128 @@ class main { /** - * Config + * Operating config determined by main::get_config(). * - * @var const array + * @staticvar array * @access private */ - private const CONFIG = array( - 'timezone' => "America/New_York", - 'postgresql_logfile_path' => "/var/log/postgresql/", - 'submitty_logfile_path' => "/var/log/submitty/", - 'postgresql_logfile' => "postgresql", - 'submitty_logfile' => "submitty_preferred_names", - 'submitty_log_retention' => 7 - ); - - private const CONSTANTS = array( - 'postgresql_log_row_count' => 23, - 'psql_validation_update' => 7, - 'psql_validation_log' => 11, - 'psql_validation_pfn' => 13, - 'psql_data_date' => 8, - 'psql_data_auth' => 19, - 'psql_data_pfn' => 14 - ); + private static $config = array(); + + //Do not change these. + private const POSTGRESQL_LOG_ROW_COUNT = 23; + private const PSQL_VALIDATION_UPDATE = 7; + private const PSQL_VALIDATION_LOG = 11; + private const PSQL_VALIDATION_PFN = 13; + private const PSQL_DATA_DATE = 8; + private const PSQL_DATA_AUTH = 19; + private const PSQL_DATA_PFN = 14; + private const POSTGRESQL_LOGDIR = '/psql/'; + private const POSTGRESQL_LOGFILE = 'postgresql'; + private const PREFERRED_NAMES_LOGDIR = '/preferred_names/'; + private const PREFERRED_NAMES_LOGFILE = 'preferred-names'; + private const ERROR_LOGFILE = 'errors.log'; /** - * Method to invoke to run this program: main::run() + * Main process. Invoke with main::run() * - * @access public * @static + * @access public */ public static function run() { - // make sure we are running as root from cli - switch (true) { - case posix_getuid() !== 0: - case PHP_SAPI !== 'cli': - fprintf(STDERR, "This is a command line script. Root required.%s", PHP_EOL); + //make sure we are running from cli + if (PHP_SAPI !== 'cli') { + fwrite(STDERR, "This is a command line script." . PHP_EOL); exit(1); } - date_default_timezone_set(self::CONFIG['timezone']); - + ini_set('display_errors', '0'); + self::get_config(); self::parse_and_write_logs(); self::log_retention_and_deletion(); exit(0); } - /** - * Process method + /** + * Read JSON config files to build self::$config array. * + * This function assumes that the script is running from submitty/sbin. + * + * @static * @access private + */ + private static function get_config() { + + self::$config['mode'] = cli_args::parse_args(); + //'prod' reads yesterday's CSV. 'dev' read's most current CSV. + self::$config['psql_log_time_offset'] = (self::$config['mode'] === 'prod' ? -86400 : 0); + + if (file_exists("../config/submitty.json")) { + $json = file_get_contents("../config/submitty.json"); + $json = json_decode($json, true); + } else { + // Error logging not configured yet. + fwrite(STDERR, print_r(error_get_last(), true)); + exit(1); + } + + date_default_timezone_set($json['timezone']); + self::$config['postgresql_logfile_path'] = $json['site_log_path'] . self::POSTGRESQL_LOGDIR; + self::$config['pfn_logfile_path'] = $json['site_log_path'] . self::PREFERRED_NAMES_LOGDIR; + + if (file_exists("../config/preferred_name_logging.json")) { + $json = file_get_contents("../config/preferred_name_logging.json"); + $json = json_decode($json, true); + } else { + $json = array(); + } + + if (array_key_exists('log_emails', $json)) { + if (is_array($json['log_emails'])) { + self::$config['log_emails'] = $json['log_emails']; + } else { + self::$config['log_emails'] = array($json['log_emails']); + } + } else { + self::$config['log_emails'] = null; + } + + if (array_key_exists('log_file_retention', $json)) { + self::$config['log_file_retention'] = (int)$json['log_file_retention']; + } else { + self::$config['log_file_retention'] = 7; + } + } + + /** + * Scrape psql log and write preferred_names log + * * @static + * @access private */ private static function parse_and_write_logs() { - //Check to make sure Submitty log directory path exists. Create it if needed. - if (file_exists(self::CONFIG['submitty_logfile_path']) === false) { - if (mkdir(self::CONFIG['submitty_logfile_path'], 0700) === false) { - self::log("Submitty log folder needed, mkdir failed."); - exit(1); - } + //Check to make sure Submitty log directory path exists. + if (file_exists(self::$config['pfn_logfile_path']) === false) { + self::log("Submitty log folder missing."); + exit(1); } //Prepare submitty preferred name change log file for today. - $submitty_logfile = sprintf("%s%s_%s.log", self::CONFIG['submitty_logfile_path'], self::CONFIG['submitty_logfile'], date("Y-m-d")); - $submitty_fh = fopen($submitty_logfile, 'w'); - if ($submitty_fh === false) { + $pfn_log_file = sprintf("%s%s_%s.log", self::$config['pfn_logfile_path'], self::PREFERRED_NAMES_LOGFILE, date("Y-m-d")); + $pfn_fh = fopen($pfn_log_file, 'w'); + if ($pfn_fh === false) { self::log("Cannot create Submitty logfile."); exit(1); } else { - fwrite($submitty_fh, "Log opened." . PHP_EOL); + fwrite($pfn_fh, "Log opened." . PHP_EOL); } //There can be multiple psql log files that need to be read. - //But we want the ones dated one day prior (hence subtract 86400 seconds which is one day) - $preg_str = sprintf("~^%s\-%s_\d{6}\.csv$~", self::CONFIG['postgresql_logfile'], preg_quote(date("Y-m-d", time() - 86400))); - $logfiles = preg_grep($preg_str, scandir(self::CONFIG['postgresql_logfile_path'])); + $preg_str = sprintf("~^%s_%sT\d{6}\.csv$~", self::POSTGRESQL_LOGFILE, preg_quote(date("Y-m-d", time() + self::$config['psql_log_time_offset']))); + $logfiles = preg_grep($preg_str, scandir(self::$config['postgresql_logfile_path'])); foreach ($logfiles as $logfile) { - $psql_fh = fopen(self::CONFIG['postgresql_logfile_path'] . $logfile, 'r'); + $psql_fh = fopen(self::$config['postgresql_logfile_path'] . $logfile, 'r'); if ($psql_fh === false) { self::log("Cannot open {$logfile}."); continue; @@ -103,11 +146,11 @@ private static function parse_and_write_logs() { while($psql_row !== false) { //Validation. Any case is true, validation fails and row is ignored. switch(true) { - case count($psql_row) !== self::CONSTANTS['postgresql_log_row_count']: - self::log(sprintf("NOTICE: PSQL log row %d had %d columns. %d columns expected. Row ignored.%s", $psql_row_num, count($psql_row), self::CONFIG['postgresql_log_row_count'])); - case $psql_row[self::CONSTANTS['psql_validation_update']] !== "UPDATE": - case $psql_row[self::CONSTANTS['psql_validation_log']] !== "LOG": - case $psql_row[self::CONSTANTS['psql_validation_pfn']] !== "PREFERRED_NAME DATA UPDATE": + case count($psql_row) !== self::POSTGRESQL_LOG_ROW_COUNT; + self::log(sprintf("NOTICE: PSQL log row %d had %d columns. %d columns expected. Row ignored.", $psql_row_num, count($psql_row), self::$config['postgresql_log_row_count'])); + case $psql_row[self::PSQL_VALIDATION_UPDATE] !== "UPDATE": + case $psql_row[self::PSQL_VALIDATION_LOG] !== "LOG": + case $psql_row[self::PSQL_VALIDATION_PFN] !== "PREFERRED_NAME DATA UPDATE": $psql_row = fgetcsv($psql_fh); $psql_row_num++; continue 2; @@ -118,32 +161,28 @@ private static function parse_and_write_logs() { $psql_row = array_map('trim', $psql_row); //Get date token. - $date = $psql_row[self::CONSTANTS['psql_data_date']]; + $date = $psql_row[self::PSQL_DATA_DATE]; //Get "AUTH" token (who logged the change). $key = array(); - if (preg_match("~/\* AUTH: \"[\w\-]+\" \*/~", $psql_row[19]) === 1) { - $key['start'] = strpos($psql_row[self::CONSTANTS['psql_data_auth']], "/* AUTH: ") + 3; - $key['end'] = strpos($psql_row[self::CONSTANTS['psql_data_auth']], " */"); - $auth = " | " . substr($psql_row[self::CONSTANTS['psql_data_auth']], $key['start'], $key['end'] - $key['start']); + if (preg_match("~/\* AUTH: \"[\w\-]+\" \*/~", $psql_row[self::PSQL_DATA_AUTH]) === 1) { + $key['start'] = strpos($psql_row[self::PSQL_DATA_AUTH], "/* AUTH: ") + 3; + $key['end'] = strpos($psql_row[self::PSQL_DATA_AUTH], " */"); + $auth = " | " . substr($psql_row[self::PSQL_DATA_AUTH], $key['start'], $key['end'] - $key['start']); } else { - $auth = " | AUTH NOT LOGGED "; - //Anything sent to STDERR gets emailed by cron. - fprintf(STDERR, "WARNING: AUTH NOT LOGGED%s%s", PHP_EOL, var_export($psql_row, true)); + $auth = " | AUTH NOT LOGGED"; } //Get user_id and preferred name change tokens. $preferred_name = array(); - $preferred_names_data = explode(" ", $psql_row[self::CONSTANTS['psql_data_pfn']]); + $preferred_names_data = explode(" ", $psql_row[self::PSQL_DATA_PFN]); //user_id token $key = array_search("USER_ID:", $preferred_names_data); if ($key !== false) { - $user_id = " | USER_ID: {$preferred_names_data[$key+1]} "; + $user_id = " | USER_ID: {$preferred_names_data[$key+1]}"; } else { - $user_id = " | USER_ID NOT LOGGED "; - //Anything sent to STDERR gets emailed by cron. - fprintf(STDERR, "WARNING: USER ID NOT LOGGED%s%s", PHP_EOL, var_export($psql_row, true)); + $user_id = " | USER_ID NOT LOGGED"; } $key = array_search("PREFERRED_FIRSTNAME", $preferred_names_data); @@ -161,21 +200,21 @@ private static function parse_and_write_logs() { // It is possible that no Preferred Lastname was logged, in which we can ignore an move on. //Build preferred name change log entry. - $submitty_log = $date . $auth . $user_id; + $pfn_log_row = $date . $auth . $user_id; if (isset($preferred_name['first'])) { - $submitty_log .= " | PREFERRED_FIRSTNAME OLD: {$preferred_name['first']['old']} NEW: {$preferred_name['first']['new']}"; + $pfn_log_row .= " | PREFERRED_FIRSTNAME OLD: {$preferred_name['first']['old']} NEW: {$preferred_name['first']['new']}"; } else { - $submitty_log .= " | PREFERRED_FIRSTNAME UNCHANGED"; + $pfn_log_row .= " | PREFERRED_FIRSTNAME UNCHANGED"; } if (isset($preferred_name['last'])) { - $submitty_log .= " | PREFERRED_LASTNAME OLD: {$preferred_name['last']['old']} NEW: {$preferred_name['last']['new']}"; + $pfn_log_row .= " | PREFERRED_LASTNAME OLD: {$preferred_name['last']['old']} NEW: {$preferred_name['last']['new']}"; } else { - $submitty_log .= " | PREFERRED_LASTNAME UNCHANGED"; + $pfn_log_row .= " | PREFERRED_LASTNAME UNCHANGED"; } //Write log entry and go to next row. - fwrite($submitty_fh, $submitty_log . PHP_EOL); + fwrite($pfn_fh, $pfn_log_row . PHP_EOL); $psql_row = fgetcsv($psql_fh); $psql_row_num++; } @@ -183,47 +222,143 @@ private static function parse_and_write_logs() { fclose($psql_fh); } - fwrite($submitty_fh, "Log closed." . PHP_EOL); - fclose($submitty_fh); + fwrite($pfn_fh, "Log closed." . PHP_EOL); + fclose($pfn_fh); } /** - * Automatically remove old logs + * Automatically remove expired postgresql and preferred name change logs. + * + * Note that there is 86400 seconds in a day. * - * @access private * @static + * @access private */ private static function log_retention_and_deletion() { - $preg_str = sprintf("~^%s_%s.log$~", self::CONFIG['submitty_logfile'], preg_quote(date("m-d-Y"))); - $logfiles = preg_grep($preg_str, scandir(self::CONFIG['submitty_logfile_path'])); - $expiration_epoch = (int)(strtotime(date('Y-m-d')) / 86400) - self::CONFIG['submitty_log_retention']; - - foreach($logfiles as $logfile) { - $datestamp = substr($logfile, strpos($logfile, "_") + 1, 10); - $datestamp_epoch = (int)(strtotime($datestamp) / 86400); - if ($datestamp_epoch < $expiration_epoch) { - if (unlink(self::CONFIG['submitty_logfile_path'] . $logfile) === false) { - self::log("Could not delete old logfile: {$logfile}"); + $remove_logfiles = function($path, $logfiles, $expiration_epoch) { + foreach($logfiles as $logfile) { + $datestamp = substr($logfile, strpos($logfile, "_") + 1, 10); + $datestamp_epoch = intdiv(strtotime($datestamp), 86400); + if ($datestamp_epoch < $expiration_epoch) { + if (unlink($path . $logfile) === false) { + self::log("Unable to delete expired log {$logfile}"); + } } } - } + }; + + // Remove expired postgresql logs + $regex_pattern = sprintf("~^%s_\d{4}\-\d{2}\-\d{2}\-\d{6}\.csv|log$~", self::POSTGRESQL_LOGFILE); + $logfiles = preg_grep($regex_pattern, scandir(self::$config['postgresql_logfile_path'])); + $expiration_epoch = intdiv(time(), 86400) - 2; + $remove_logfiles(self::$config['postgresql_logfile_path'], $logfiles, $expiration_epoch); + + // Remove expired preferred name change logs + $regex_pattern = sprintf("~^%s_\d{4}\-\d{2}\-\d{2}\.log$~", self::PREFERRED_NAMES_LOGFILE); + $logfiles = preg_grep($regex_pattern, scandir(self::$config['pfn_logfile_path'])); + $expiration_epoch = intdiv(time(), 86400) - self::$config['log_file_retention']; + $remove_logfiles(self::$config['pfn_logfile_path'], $logfiles, $expiration_epoch); } /** - * Log messages to error log and STDERR. + * Log messages to error log. Also log to STDERR in 'dev' mode. * - * @access private * @static + * @access private */ private static function log(string $msg) { - $datestamp = date("m-d-Y"); - error_log(sprintf("%s %s", $datestamp, $msg), 0); - fprintf(STDERR, "%s%s", $msg, PHP_EOL); + $msg = sprintf("%s %s%sDetails: %s%s", date("m-d-Y H:i:s"), $msg, PHP_EOL, print_r(error_get_last(), true), PHP_EOL); + error_log($msg, 3, self::$config['pfn_logfile_path'] . self::ERROR_LOGFILE); + + if (self::$config['mode'] === 'dev') { + fwrite(STDERR, $msg); + } + + if (!is_null(self::$config['log_emails'])) { + $send_msg = "Error log from Submitty preferred name logging." . PHP_EOL . $msg; + $send_msg = wordwrap($send_msg, 70); + foreach(self::$config['log_emails'] as $email) { + error_log($send_msg, 1, $email); + } + } + } +} //END class main + +/** Class to parse command line arguments */ +class cli_args { + + /** @staticvar string usage help message */ + private static $help_usage = << diff --git a/preferred_name_logging/readme.md b/preferred_name_logging/readme.md index 9e0f16f..14eb1b1 100644 --- a/preferred_name_logging/readme.md +++ b/preferred_name_logging/readme.md @@ -1,96 +1,87 @@ # Preferred Name Logging -This tool will help track when user preferred names are changed. It attempts to -log who was authenticated for the change, and what change occurred. +This script will help track when user preferred names are changed. It attempts +to log who was authenticated for the change, and what change occurred. It works by first having Postgresql log the required information as the data is updated in Submitty's databases. Then the sysadmin tool in this folder will scrape the Postgresql logfile and record only those entries that showed preferred name change. +This is setup and configured by Submitty during system installation and should +automatically operate daily at 2:05AM. + ## FERPA Data processed and logged by this tool may be protected by [FERPA (20 U.S.C. ยง 1232g)](https://www2.ed.gov/policy/gen/guid/fpco/ferpa/index.html). Please consult and abide by your institute's data protection policies. -## Postgresql - -Submitty's website code and database schema will instruct Postgresql to log any -`UPDATE` query involving any user's preferred first or last name changes. - -### Postgresql Configuration - -Postgresql needs to be configured to produce the required logs as the necessary -logging is not enabled by default. - -#### Config file - -- Config file is `Postgresql.conf` - - In Ubuntu 18.04, it is located in `/etc/postgresql/10/main/postgresql.conf` - -#### Config options - -You will find these options under "ERROR REPORTING AND LOGGING". Please enable -(remove the '#' symbol preceding the line) and set these values: - -- `log_destination = 'csvlog'` - - The sysadmin tool will scrape a CSV file. -- `logging_collector = on` - - Postgresql doesn't write logs without. this. -- `log_directory = '/var/log/postgresql'` - - You can use a different folder, if needed. -- `log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'` - - You can change the filename a little, but it **must** end with `-%Y-%m-%d_%H%M%S.log` -- `log_file_mode = 0600` - - Limits access to this logfile. -- `log_rotation_age = 2d` - - At least 2 days of logs will be needed since the sysadmin tool is intended - to run the following day. -- `log_rotation_size = 10MB` - - This doesn't need to be changed. Any additional logfiles created for a - single day will be picked up by the sysadmin tool. -- `log_min_messages = log` - - Preferred name changes are logged at the `log` level. You can set any - higher level of detail, but not lower than `log`. -- `log_min_duration_statement = 0` - - We want all log instances regardless of process time. -- `log_line_prefix = '%m [%p] %q%u@%d '` - - This can be changed so long as the very first detail is a timestamp. The - sysadmin tool expects to find a timestamp at the very first column. - -## Sysadmin Tool - -The sysadmin tool needs to be run on a machine with local file access to the -Postgresql log file. It is written in PHP. - -### Sysdamin Tool Config - -The configuration is defined as a class constant. Near the top of the code is a -small block starting as `private const CONFIG = array(`. Inside the block will -be a config element in single quotes, an arrow like `=>`, a value possibly -enclosed in double quotes, followed by a comma. (Do not lose the comma!) - -- `'timezone' => "America/New_York",` - - Set this to your local timezone. q.v. - [https://www.php.net/manual/en/timezones.php](https://www.php.net/manual/en/timezones.php) - for more information. -- `'postgresql_logfile_path' => "/var/log/postgresql/",` - - Set this to the same setting as `log_directory` in `postgresql.conf`. -- `'submitty_logfile_path' => "/var/log/submitty/",` - - Where the sysadmin tool will write the preferred name logfile. -- `'postgresql_logfile' => "postgresql",` - - The name of the logfile created by Postgresql. Do not include the time - stamp. This only needs to be changed when `log_filename` in - `postgresql.conf` is changed. -- `'submitty_logfile' => "submitty_preferred_names",` - - Name of the preferred name change logfile. You can leave this as is. -- `'submitty_log_retention' => 7` - - How many days of preferred name change logs to keep. - -### Running the Sysadmin Tool - -This tool is meant to be executed on the command line and can be scheduled as a -cron job. Errors will be outputted to `STDERR`, which in a crontab can either -be redirected to `/dev/null` or emailed to a sysadmin. Running as `root` is -required, and there are no command line arguments. +## Logs + +Submitty's installation scripts will configure postgresql to write its logs to +`/var/local/submitty/logs/psql/`, rotated on a daily basis. + +This script will scrape the previous day's Postgresql log for any logged +changes to any user's preferred names. It will then create a daily log of +*preferred name changes* within `/var/local/submitty/logs/preferred_names/`. + +This script will also remove postgresql logs older than 2 days as postgresql's +own log rotation will not selectively remove outdated logs. + +## postgresql.conf + +Postgresql's original configuration is first copied to `postgresql.conf.backup`. +The following changes will be applied: +``` +log_destination = 'csvlog' +logging_collector = on +log_directory = '/var/log/postgresql' +log_filename = 'postgresql_%Y-%m-%dT%H%M%S.log' +log_file_mode = 0640 +log_rotation_age = 1d +log_rotation_size = 0 +log_min_messages = warning +log_min_duration_statement = -1 +log_statement = 'ddl' +log_error_verbosity = default +``` + +## preferred_names.json + +A sysadmin may optionally create a json file to configure a couple of options +for preferred name logging. If this json is not created, the script will +assume default settings, instead. + +To set these options, first create an empty text file in +`usr/local/submitty/config/preferred_names.json` + +Next, following the json format, you may set the following options (anything +else in the file will be ignored). + +* `log_emails` + This is either a singular email address or a list of email addresses. The + script will send error messages to the email address(es) listed. + + If this is a list, key values are ignored. But you could set key values to + document who owns a particular email address. + + Set to `null` to turn this off. Default setting is `null`. + +* `log_file_retention` + A whole number representing how many days of preferred name change logs to + keep. *This does not affect postgresql's logs.* Default setting is 7. + +## Example json: +```json +{ + "log_emails": + { + "Ada_Lovelace": "alovelace@submitty.com", + "Charles_Babbage": "cbabbage@submitty.com", + "Sysadmin_Mailing_List": "sysadmins@lists.submitty.com" + }, + + "log_file_retention": 30 +} +```