From b717fc952b8a66da4f450ea0a88216f7bb2c86ec Mon Sep 17 00:00:00 2001 From: Jonathan Strong Date: Thu, 9 Nov 2023 13:00:43 -0500 Subject: [PATCH] clean up code --- script/get-publish-history.py | 50 +++++++---------------------------- 1 file changed, 9 insertions(+), 41 deletions(-) diff --git a/script/get-publish-history.py b/script/get-publish-history.py index 4bc3797..206c220 100644 --- a/script/get-publish-history.py +++ b/script/get-publish-history.py @@ -1,17 +1,17 @@ -## Imports +# std import os import sys -import git import io from pathlib import Path import json + +# non-std +import git import pandas as pd -## Module Constants DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z" EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" - def versions(path, branch='master'): """ This function returns a generator which iterates through all commits of @@ -70,55 +70,23 @@ def versions(path, branch='master'): 'commit': commit.hexsha, 'author': commit.author.email, 'time': commit.authored_datetime.strftime(DATE_TIME_FORMAT), - #'size': diff_size(diff), - #'type': diff_type(diff), 'crate_name': d['name'], 'version': d['vers'], - #'json': lastline, } - - # Update the stats with the additional information - # stats.update(row) - # yield stats - yield row - -#def diff_size(diff): -# """ -# Computes the size of the diff by comparing the size of the blobs. -# """ -# if diff.b_blob is None and diff.deleted_file: -# # This is a deletion, so return negative the size of the original. -# return diff.a_blob.size * -1 -# -# if diff.a_blob is None and diff.new_file: -# # This is a new file, so return the size of the new value. -# return diff.b_blob.size -# -# # Otherwise just return the size a-b -# return diff.a_blob.size - diff.b_blob.size -# -# -#def diff_type(diff): -# """ -# Determines the type of the diff by looking at the diff flags. -# """ -# if diff.renamed: return 'R' -# if diff.deleted_file: return 'D' -# if diff.new_file: return 'A' -# return 'M' - def main(path): df = pd.DataFrame(versions(path)) df['time'] = pd.to_datetime(df['time'], utc=True) - # df['unix_nanos'] = df['time'].astype('int') df = df.sort_values(by='time').groupby(['crate_name', 'version']).last().reset_index() buf = io.StringIO() df.to_csv(buf, index=False) print(buf.getvalue()) if __name__ == '__main__': - path = sys.argv[1] - main(path) + if len(sys.argv) == 1 or any(a == '-h' or a == '--help' for a in sys.argv): + print("USAGE:\n python3 get-publish-history.py PATH\n", file=sys.stderr) + else: + path = sys.argv[1] + main(path)