|
|
|
# std
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import io
|
|
|
|
from pathlib import Path
|
|
|
|
import json
|
|
|
|
|
|
|
|
# non-std
|
|
|
|
import git
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z"
|
|
|
|
EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
|
|
|
|
|
|
|
|
def versions(path, branch='master'):
|
|
|
|
"""
|
|
|
|
This function returns a generator which iterates through all commits of
|
|
|
|
the repository located in the given path for the given branch. It yields
|
|
|
|
file diff information to show a timeseries of file changes.
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Create the repository, raises an error if it isn't one.
|
|
|
|
repo = git.Repo(path)
|
|
|
|
|
|
|
|
# Iterate through every commit for the given branch in the repository
|
|
|
|
for commit in repo.iter_commits(branch):
|
|
|
|
# Determine the parent of the commit to diff against.
|
|
|
|
# If no parent, this is the first commit, so use empty tree.
|
|
|
|
# Then create a mapping of path to diff for each file changed.
|
|
|
|
parent = commit.parents[0] if commit.parents else EMPTY_TREE_SHA
|
|
|
|
diffs = {
|
|
|
|
diff.a_path: diff for diff in commit.diff(parent)
|
|
|
|
}
|
|
|
|
|
|
|
|
# The stats on the commit is a summary of all the changes for this
|
|
|
|
# commit, we'll iterate through it to get the information we need.
|
|
|
|
for objpath, stats in commit.stats.files.items():
|
|
|
|
|
|
|
|
# Select the diff for the path in the stats
|
|
|
|
diff = diffs.get(objpath)
|
|
|
|
|
|
|
|
# If the path is not in the dictionary, it's because it was
|
|
|
|
# renamed, so search through the b_paths for the current name.
|
|
|
|
if not diff:
|
|
|
|
for diff in diffs.values():
|
|
|
|
if diff.b_path == path and diff.renamed:
|
|
|
|
break
|
|
|
|
|
|
|
|
p = Path(objpath)
|
|
|
|
if len(p.parts) != 3:
|
|
|
|
print(f'skipping path: wrong depth ({p.parts})', file=sys.stderr)
|
|
|
|
continue
|
|
|
|
try:
|
|
|
|
obj = commit.tree / objpath
|
|
|
|
with io.BytesIO(obj.data_stream.read()) as f:
|
|
|
|
lastline = list(f.readlines())[-1].decode('utf-8')
|
|
|
|
except Exception as e:
|
|
|
|
print(f'failed to load file at commit {commit}', file=sys.stderr)
|
|
|
|
continue
|
|
|
|
|
|
|
|
lastline = lastline.strip()
|
|
|
|
try:
|
|
|
|
d = json.loads(lastline)
|
|
|
|
except Exception as e:
|
|
|
|
print(f'failed to parse json at commit {commit}: {e}', file=sys.stderr)
|
|
|
|
continue
|
|
|
|
|
|
|
|
row = {
|
|
|
|
'path': os.path.join(path, objpath),
|
|
|
|
'commit': commit.hexsha,
|
|
|
|
'author': commit.author.email,
|
|
|
|
'time': commit.authored_datetime.strftime(DATE_TIME_FORMAT),
|
|
|
|
'crate_name': d['name'],
|
|
|
|
'version': d['vers'],
|
|
|
|
}
|
|
|
|
yield row
|
|
|
|
|
|
|
|
def main(path):
|
|
|
|
df = pd.DataFrame(versions(path))
|
|
|
|
df['time'] = pd.to_datetime(df['time'], utc=True)
|
|
|
|
df = df.sort_values(by='time').groupby(['crate_name', 'version']).last().reset_index()
|
|
|
|
buf = io.StringIO()
|
|
|
|
df.to_csv(buf, index=False)
|
|
|
|
print(buf.getvalue())
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
if len(sys.argv) == 1 or any(a == '-h' or a == '--help' for a in sys.argv):
|
|
|
|
print("USAGE:\n python3 get-publish-history.py PATH\n", file=sys.stderr)
|
|
|
|
else:
|
|
|
|
path = sys.argv[1]
|
|
|
|
main(path)
|
|
|
|
|