Browse Source

clean up code

feat/mass-publish-tool
Jonathan Strong 1 year ago
parent
commit
b717fc952b
  1. 50
      script/get-publish-history.py

50
script/get-publish-history.py

@ -1,17 +1,17 @@
## Imports # std
import os import os
import sys import sys
import git
import io import io
from pathlib import Path from pathlib import Path
import json import json
# non-std
import git
import pandas as pd import pandas as pd
## Module Constants
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z" DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z"
EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
def versions(path, branch='master'): def versions(path, branch='master'):
""" """
This function returns a generator which iterates through all commits of This function returns a generator which iterates through all commits of
@ -70,55 +70,23 @@ def versions(path, branch='master'):
'commit': commit.hexsha, 'commit': commit.hexsha,
'author': commit.author.email, 'author': commit.author.email,
'time': commit.authored_datetime.strftime(DATE_TIME_FORMAT), 'time': commit.authored_datetime.strftime(DATE_TIME_FORMAT),
#'size': diff_size(diff),
#'type': diff_type(diff),
'crate_name': d['name'], 'crate_name': d['name'],
'version': d['vers'], 'version': d['vers'],
#'json': lastline,
} }
# Update the stats with the additional information
# stats.update(row)
# yield stats
yield row yield row
#def diff_size(diff):
# """
# Computes the size of the diff by comparing the size of the blobs.
# """
# if diff.b_blob is None and diff.deleted_file:
# # This is a deletion, so return negative the size of the original.
# return diff.a_blob.size * -1
#
# if diff.a_blob is None and diff.new_file:
# # This is a new file, so return the size of the new value.
# return diff.b_blob.size
#
# # Otherwise just return the size a-b
# return diff.a_blob.size - diff.b_blob.size
#
#
#def diff_type(diff):
# """
# Determines the type of the diff by looking at the diff flags.
# """
# if diff.renamed: return 'R'
# if diff.deleted_file: return 'D'
# if diff.new_file: return 'A'
# return 'M'
def main(path): def main(path):
df = pd.DataFrame(versions(path)) df = pd.DataFrame(versions(path))
df['time'] = pd.to_datetime(df['time'], utc=True) df['time'] = pd.to_datetime(df['time'], utc=True)
# df['unix_nanos'] = df['time'].astype('int')
df = df.sort_values(by='time').groupby(['crate_name', 'version']).last().reset_index() df = df.sort_values(by='time').groupby(['crate_name', 'version']).last().reset_index()
buf = io.StringIO() buf = io.StringIO()
df.to_csv(buf, index=False) df.to_csv(buf, index=False)
print(buf.getvalue()) print(buf.getvalue())
if __name__ == '__main__': if __name__ == '__main__':
path = sys.argv[1] if len(sys.argv) == 1 or any(a == '-h' or a == '--help' for a in sys.argv):
main(path) print("USAGE:\n python3 get-publish-history.py PATH\n", file=sys.stderr)
else:
path = sys.argv[1]
main(path)

Loading…
Cancel
Save