diff --git a/git-restore-mtime b/git-restore-mtime index 3b472d6..db75e25 100755 --- a/git-restore-mtime +++ b/git-restore-mtime @@ -294,6 +294,37 @@ def get_mtime_ns(secs: int, idx: int): def get_mtime_path(path): return os.path.getmtime(path) +class DirectoryMtimes(object): + """ + Track mtimes for path. + Lets the newer mtime bubble up to the parent entries. + """ + + def __init__(self): + self.mtimes = {} + + def set_path_mtime(self, path, mtime): + """Set mtime of path. Also updates parent paths if mtime is newer.""" + path_parts = os.path.normpath(path).split(os.sep) + + # Update mtimes for path and parent paths. + # Note: '' is a valid path we track an mtime for + while True: + key = tuple(path_parts) + current_mtime = self.mtimes.get(key) + if current_mtime is None or current_mtime < mtime: + self.mtimes[key] = mtime + else: + # Assume parent mtimes are already newer + break + + if len(path_parts) == 0: break + path_parts = path_parts[:-1] + + def get_path_mtime(self, path): + """Get mtime recorded for a path""" + path_parts = os.path.normpath(path).split(os.sep) + return self.mtimes.get(tuple(path_parts)) # Git class and parse_log(), the heart of the script ########################## @@ -366,8 +397,19 @@ class Git: class Error(subprocess.CalledProcessError): """Error from git executable""" +def touch_dir(dirname, mtime, datestr, stats, git): + if args.debug: + log.debug("%d\t%d\t-\t%s\t%s", + stats['loglines'], stats['commits'], + datestr, "{}/".format(dirname or '.')) + try: + touch(os.path.join(git.workdir, dirname), mtime) + stats['dirtouches'] += 1 + except Exception as e: + log.error("ERROR: %s: %s", e, dirname) + stats['direrrors'] += 1 -def parse_log(filelist, dirlist, stats, git, merge=False, filterlist=None): +def parse_log(filelist, dirlist, dir_times, stats, git, merge=False, filterlist=None): mtime = 0 datestr = isodate(0) for line in git.log( @@ -425,27 +467,20 @@ def parse_log(filelist, dirlist, stats, git, merge=False, filterlist=None): stats['errors'] += 1 def do_dir(): - if args.debug: - log.debug("%d\t%d\t-\t%s\t%s", - stats['loglines'], stats['commits'], - datestr, "{}/".format(dirname or '.')) - try: - touch(os.path.join(git.workdir, dirname), mtime) - stats['dirtouches'] += 1 - except Exception as e: - log.error("ERROR: %s: %s", e, dirname) - stats['direrrors'] += 1 + touch_dir(dirname, mtime, datestr, stats, git) if file in filelist: stats['files'] -= 1 filelist.remove(file) do_file() + dir_times.set_path_mtime(file, mtime) if args.dirs and status in ('A', 'D'): dirname = os.path.dirname(file) if dirname in dirlist: dirlist.remove(dirname) do_dir() + dir_times.set_path_mtime(dirname, mtime) # All files done? if not stats['files']: @@ -516,7 +551,15 @@ def main(): filelist -= dirty # Build dir list to be processed - dirlist = set(os.path.dirname(_) for _ in filelist) if args.dirs else set() + # For every file name, also add _all_ parent directories to set, so we also consider directories + # with just subdirectories, but no files. + dirlist = set() + if args.dirs: + for fn in filelist: + current_dir = os.path.dirname(fn) + while current_dir != "": + dirlist.add(current_dir) + current_dir = os.path.dirname(current_dir) stats['totalfiles'] = stats['files'] = len(filelist) log.info("{0:,} files to be processed in work dir".format(stats['totalfiles'])) @@ -525,9 +568,11 @@ def main(): # Nothing to do. Exit silently and without errors, just like git does return + dir_times = DirectoryMtimes() + # Process the log until all files are 'touched' log.debug("Line #\tLog #\tF.Left\tModification Time\tFile Name") - parse_log(filelist, dirlist, stats, git, args.merge, args.pathspec) + parse_log(filelist, dirlist, dir_times, stats, git, args.merge, args.pathspec) # Missing files if filelist: @@ -538,13 +583,19 @@ def main(): missing = len(filterlist) log.info("{0:,} files not found in log, trying merge commits".format(missing)) for i in range(0, missing, STEPMISSING): - parse_log(filelist, dirlist, stats, git, + parse_log(filelist, dirlist, dir_times, stats, git, merge=True, filterlist=filterlist[i:i + STEPMISSING]) # Still missing some? for file in filelist: log.warning("WARNING: not found in the log: %s", file) + # Directories not appearing in log + for dir in dirlist: + dir_mtime = dir_times.get_path_mtime(dir) + if dir_mtime is not None: + touch_dir(dir, dir_mtime, isodate(dir_mtime), stats, git) + # Final statistics # Suggestion: use git-log --before=mtime to brag about skipped log entries def log_info(msg, *a, width=13):