|
|
@ -1,17 +1,17 @@ |
|
|
|
## Imports |
|
|
|
# std |
|
|
|
import os |
|
|
|
import os |
|
|
|
import sys |
|
|
|
import sys |
|
|
|
import git |
|
|
|
|
|
|
|
import io |
|
|
|
import io |
|
|
|
from pathlib import Path |
|
|
|
from pathlib import Path |
|
|
|
import json |
|
|
|
import json |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# non-std |
|
|
|
|
|
|
|
import git |
|
|
|
import pandas as pd |
|
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
|
|
|
## Module Constants |
|
|
|
|
|
|
|
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z" |
|
|
|
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z" |
|
|
|
EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" |
|
|
|
EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def versions(path, branch='master'): |
|
|
|
def versions(path, branch='master'): |
|
|
|
""" |
|
|
|
""" |
|
|
|
This function returns a generator which iterates through all commits of |
|
|
|
This function returns a generator which iterates through all commits of |
|
|
@ -70,55 +70,23 @@ def versions(path, branch='master'): |
|
|
|
'commit': commit.hexsha, |
|
|
|
'commit': commit.hexsha, |
|
|
|
'author': commit.author.email, |
|
|
|
'author': commit.author.email, |
|
|
|
'time': commit.authored_datetime.strftime(DATE_TIME_FORMAT), |
|
|
|
'time': commit.authored_datetime.strftime(DATE_TIME_FORMAT), |
|
|
|
#'size': diff_size(diff), |
|
|
|
|
|
|
|
#'type': diff_type(diff), |
|
|
|
|
|
|
|
'crate_name': d['name'], |
|
|
|
'crate_name': d['name'], |
|
|
|
'version': d['vers'], |
|
|
|
'version': d['vers'], |
|
|
|
#'json': lastline, |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
# Update the stats with the additional information |
|
|
|
|
|
|
|
# stats.update(row) |
|
|
|
|
|
|
|
# yield stats |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
yield row |
|
|
|
yield row |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#def diff_size(diff): |
|
|
|
|
|
|
|
# """ |
|
|
|
|
|
|
|
# Computes the size of the diff by comparing the size of the blobs. |
|
|
|
|
|
|
|
# """ |
|
|
|
|
|
|
|
# if diff.b_blob is None and diff.deleted_file: |
|
|
|
|
|
|
|
# # This is a deletion, so return negative the size of the original. |
|
|
|
|
|
|
|
# return diff.a_blob.size * -1 |
|
|
|
|
|
|
|
# |
|
|
|
|
|
|
|
# if diff.a_blob is None and diff.new_file: |
|
|
|
|
|
|
|
# # This is a new file, so return the size of the new value. |
|
|
|
|
|
|
|
# return diff.b_blob.size |
|
|
|
|
|
|
|
# |
|
|
|
|
|
|
|
# # Otherwise just return the size a-b |
|
|
|
|
|
|
|
# return diff.a_blob.size - diff.b_blob.size |
|
|
|
|
|
|
|
# |
|
|
|
|
|
|
|
# |
|
|
|
|
|
|
|
#def diff_type(diff): |
|
|
|
|
|
|
|
# """ |
|
|
|
|
|
|
|
# Determines the type of the diff by looking at the diff flags. |
|
|
|
|
|
|
|
# """ |
|
|
|
|
|
|
|
# if diff.renamed: return 'R' |
|
|
|
|
|
|
|
# if diff.deleted_file: return 'D' |
|
|
|
|
|
|
|
# if diff.new_file: return 'A' |
|
|
|
|
|
|
|
# return 'M' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(path): |
|
|
|
def main(path): |
|
|
|
df = pd.DataFrame(versions(path)) |
|
|
|
df = pd.DataFrame(versions(path)) |
|
|
|
df['time'] = pd.to_datetime(df['time'], utc=True) |
|
|
|
df['time'] = pd.to_datetime(df['time'], utc=True) |
|
|
|
# df['unix_nanos'] = df['time'].astype('int') |
|
|
|
|
|
|
|
df = df.sort_values(by='time').groupby(['crate_name', 'version']).last().reset_index() |
|
|
|
df = df.sort_values(by='time').groupby(['crate_name', 'version']).last().reset_index() |
|
|
|
buf = io.StringIO() |
|
|
|
buf = io.StringIO() |
|
|
|
df.to_csv(buf, index=False) |
|
|
|
df.to_csv(buf, index=False) |
|
|
|
print(buf.getvalue()) |
|
|
|
print(buf.getvalue()) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
|
|
if len(sys.argv) == 1 or any(a == '-h' or a == '--help' for a in sys.argv): |
|
|
|
|
|
|
|
print("USAGE:\n python3 get-publish-history.py PATH\n", file=sys.stderr) |
|
|
|
|
|
|
|
else: |
|
|
|
path = sys.argv[1] |
|
|
|
path = sys.argv[1] |
|
|
|
main(path) |
|
|
|
main(path) |
|
|
|
|
|
|
|
|
|
|
|