-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2689e99
commit dc180a6
Showing
3 changed files
with
71 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from datasets import load_dataset | ||
|
||
|
||
|
||
for lang in ['python', 'js', 'cpp', 'java', 'go', 'rust']: | ||
print(f'Language: {lang}') | ||
ds = load_dataset('bigcode/humanevalpack', lang, split="test") | ||
# Average docstring length | ||
print(f'Average docstring length: {sum([len(d) for d in ds["docstring"]]) / len(ds["docstring"])}') | ||
# Min docstring length | ||
print(f'Min docstring length: {min([len(d) for d in ds["docstring"]])}') | ||
# Max docstring length | ||
print(f'Max docstring length: {max([len(d) for d in ds["docstring"]])}') | ||
# Average solution length | ||
print(f'Average solution length: {sum([len(d) for d in ds["canonical_solution"]]) / len(ds["canonical_solution"])}') | ||
# Min solution length | ||
print(f'Min solution length: {min([len(d) for d in ds["canonical_solution"]])}') | ||
# Max solution length | ||
print(f'Max solution length: {max([len(d) for d in ds["canonical_solution"]])}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import json | ||
import nlgeval | ||
from tqdm import tqdm | ||
from nlgeval import NLGEval | ||
|
||
def get_ref(file_path): | ||
ref = [] | ||
with open(file_path) as f: | ||
for line in f: | ||
ref.append(json.loads(line)['docstring']) | ||
return ref | ||
|
||
def get_hyp(file_path): | ||
with open(file_path) as f: | ||
hyp = json.load(f) | ||
return hyp | ||
|
||
|
||
n = NLGEval(no_skipthoughts=True, no_glove=True, metrics_to_omit=['SPICE', 'CIDEr', 'ROUGE_L']) | ||
|
||
for lang in ['cpp', 'java', 'go', 'js', 'python', 'rust']: | ||
metrics_dicts = [] | ||
print(f'Language: {lang}') | ||
ref = get_ref(f'data/{lang}/data/humanevalpack.jsonl') | ||
hyp = get_hyp(f'octocoder/humanevalexplain/generations_humanevalexplaindescribe{lang}_starcoderguanacocommits.json') | ||
for i in tqdm(range(len(hyp))): | ||
metrics_dicts.append({}) | ||
for j in range(len(hyp[i])): | ||
metrics_dict = n.compute_individual_metrics(ref[i], hyp[i][j]) | ||
for k in metrics_dict: | ||
metrics_dicts[i][k] = max(metrics_dicts[i].get(k, 0), metrics_dict[k]) | ||
with open(f'octocoder/humanevalexplain/metrics_humanevalexplaindescribe{lang}_starcoderguanacocommits.json', 'w') as f: | ||
json.dump(metrics_dicts, f, indent=4) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import json | ||
|
||
for lang in ['cpp', 'java', 'go', 'js', 'python', 'rust']: | ||
print(f'Language: {lang}') | ||
with open(f'evaluation/octocoder/humanevalexplain/metrics_humanevalexplaindescribe{lang}_starcoderguanacocommits.json', 'r') as f: | ||
data = json.load(f) | ||
|
||
bleu1 = [d['Bleu_1'] * 100 for d in data] | ||
bleu2 = [d['Bleu_2'] * 100 for d in data] | ||
bleu3 = [d['Bleu_3'] * 100 for d in data] | ||
bleu4 = [d['Bleu_4'] * 100 for d in data] | ||
meteor = [d['METEOR'] * 100 for d in data] | ||
|
||
# Average | ||
print(f'BLEU-1: {sum(bleu1) / len(bleu1)}') | ||
print(f'BLEU-2: {sum(bleu2) / len(bleu2)}') | ||
print(f'BLEU-3: {sum(bleu3) / len(bleu3)}') | ||
print(f'BLEU-4: {sum(bleu4) / len(bleu4)}') | ||
print(f'METEOR: {sum(meteor) / len(meteor)}') |