Skip to content

Commit

Permalink
Add stats
Browse files Browse the repository at this point in the history
  • Loading branch information
Muennighoff committed Nov 13, 2023
1 parent 2689e99 commit dc180a6
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 0 deletions.
19 changes: 19 additions & 0 deletions evaluation/other/humanevalpack_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from datasets import load_dataset



for lang in ['python', 'js', 'cpp', 'java', 'go', 'rust']:
print(f'Language: {lang}')
ds = load_dataset('bigcode/humanevalpack', lang, split="test")
# Average docstring length
print(f'Average docstring length: {sum([len(d) for d in ds["docstring"]]) / len(ds["docstring"])}')
# Min docstring length
print(f'Min docstring length: {min([len(d) for d in ds["docstring"]])}')
# Max docstring length
print(f'Max docstring length: {max([len(d) for d in ds["docstring"]])}')
# Average solution length
print(f'Average solution length: {sum([len(d) for d in ds["canonical_solution"]]) / len(ds["canonical_solution"])}')
# Min solution length
print(f'Min solution length: {min([len(d) for d in ds["canonical_solution"]])}')
# Max solution length
print(f'Max solution length: {max([len(d) for d in ds["canonical_solution"]])}')
33 changes: 33 additions & 0 deletions evaluation/other/nlg_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import json
import nlgeval
from tqdm import tqdm
from nlgeval import NLGEval

def get_ref(file_path):
ref = []
with open(file_path) as f:
for line in f:
ref.append(json.loads(line)['docstring'])
return ref

def get_hyp(file_path):
with open(file_path) as f:
hyp = json.load(f)
return hyp


n = NLGEval(no_skipthoughts=True, no_glove=True, metrics_to_omit=['SPICE', 'CIDEr', 'ROUGE_L'])

for lang in ['cpp', 'java', 'go', 'js', 'python', 'rust']:
metrics_dicts = []
print(f'Language: {lang}')
ref = get_ref(f'data/{lang}/data/humanevalpack.jsonl')
hyp = get_hyp(f'octocoder/humanevalexplain/generations_humanevalexplaindescribe{lang}_starcoderguanacocommits.json')
for i in tqdm(range(len(hyp))):
metrics_dicts.append({})
for j in range(len(hyp[i])):
metrics_dict = n.compute_individual_metrics(ref[i], hyp[i][j])
for k in metrics_dict:
metrics_dicts[i][k] = max(metrics_dicts[i].get(k, 0), metrics_dict[k])
with open(f'octocoder/humanevalexplain/metrics_humanevalexplaindescribe{lang}_starcoderguanacocommits.json', 'w') as f:
json.dump(metrics_dicts, f, indent=4)
19 changes: 19 additions & 0 deletions evaluation/other/nlg_eval_avg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import json

for lang in ['cpp', 'java', 'go', 'js', 'python', 'rust']:
print(f'Language: {lang}')
with open(f'evaluation/octocoder/humanevalexplain/metrics_humanevalexplaindescribe{lang}_starcoderguanacocommits.json', 'r') as f:
data = json.load(f)

bleu1 = [d['Bleu_1'] * 100 for d in data]
bleu2 = [d['Bleu_2'] * 100 for d in data]
bleu3 = [d['Bleu_3'] * 100 for d in data]
bleu4 = [d['Bleu_4'] * 100 for d in data]
meteor = [d['METEOR'] * 100 for d in data]

# Average
print(f'BLEU-1: {sum(bleu1) / len(bleu1)}')
print(f'BLEU-2: {sum(bleu2) / len(bleu2)}')
print(f'BLEU-3: {sum(bleu3) / len(bleu3)}')
print(f'BLEU-4: {sum(bleu4) / len(bleu4)}')
print(f'METEOR: {sum(meteor) / len(meteor)}')

0 comments on commit dc180a6

Please sign in to comment.