Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding tools for making standard metadata #612

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions utilities/add_origins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from metacat.webapi import MetaCatClient
from argparse import ArgumentParser as ap
import json

def add_args(parser):
parser.add_argument('--past_fcls', type=str, nargs='+')
parser.add_argument('--past_apps', type=str, nargs='+')

def add_origins(args, version):
if args.past_apps is None or len(args.past_fcls) != len(args.past_apps):
raise ValueError('Need to provide same number of past apps and fcls')

results = {
'origin.applications.config_files': {
args.past_apps[i]:args.past_fcls[i] for i in range(len(args.past_apps))
},
'origin.applications.versions': {
args.past_apps[i]:version
for i in range(len(args.past_apps))
},
'origin.applications.names':args.past_apps,
}
return results

if __name__ == '__main__':
parser = ap()
parser.add_argument('--json', '-j', type=str, required=True,
help='Output json file')
add_origin_args(parser)
args = parser.parse_args()

output = add_origins(args, 'v1')

# Serializing json
json_object = json.dumps(output, indent=2)

# Writing to sample.json
with open(args.json, "w") as outfile:
outfile.write(json_object)

40 changes: 40 additions & 0 deletions utilities/get_events_for_md.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import ROOT as RT
import json
from argparse import ArgumentParser as ap

def get_events(filename):
ev = RT.gallery.Event(RT.vector(RT.string)(1, filename))
event_numbers = []
for i in range(ev.numberOfEventsInFile()):
ev.goToEntry(i)
event_numbers.append(ev.eventAuxiliary().id().event())
return event_numbers

def place_events(events, md):
##Place events in the metadata
md |= {
'core.events':events,
'core.event_count':len(events),
'core.first_event_number':events[0],
'core.last_event_number':events[-1],
}

if __name__ == '__main__':
parser = ap()
parser.add_argument('-i', type=str, required=True, help='Input File')
parser.add_argument('--json', '-j', type=str, required=True, help='Output JSON file')
args = parser.parse_args()

events = get_events(args.i)

output = json.dumps({
'metadata': {
'core.events':events,
'core.event_count':len(events),
'core.first_event_number':events[0],
'core.last_event_number':events[-1],
}
})

with open(args.json, 'w') as outfile:
outfile.write(output)
93 changes: 93 additions & 0 deletions utilities/inherit_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from metacat.webapi import MetaCatClient
from argparse import ArgumentParser as ap
import json

required_keys = [
"core.data_stream",
"core.file_content_status",
"core.file_type",
"core.run_type",
"core.runs",
"core.runs_subruns",
"dune.daq_test",
"retention.status",
"retention.class",
]

def check_md(req_keys, parent_md, parent_name):
bad_keys = [rk for rk in req_keys if rk not in parent_md]
if len(bad_keys) > 0:
raise Exception( ##TODO -- better exception
f'Error! The following required keys are missing from metadata of {parent_name}'
'\n\t' + ', '.join(bad_keys)
)

def get_parent_md(parent_name):
mc = MetaCatClient()
parent_file = mc.get_file(did=parent_name, with_metadata=True,
with_provenance=False)

#TODO -- check

parent_md = parent_file['metadata']
check_md(required_keys, parent_md, parent_name)
inherited_md = {rk:parent_md[rk] for rk in required_keys}
return inherited_md

def get_parent_md_from_json(parent_md):
check_md(required_keys, parent_md, parent_md)
inherited_md = {rk:parent_md[rk] for rk in required_keys}
return inherited_md

def get_name_from_json(json_vals):
if 'did' in json_vals:
return {'did': json_vals['did']}
elif 'name' in json_vals and 'namespace' in json_vals:
return {
'name':json_vals['name'],
'namespace':json_vals['namespace'],
}
elif 'fid' in json_vals:
return {'fid': json_vals['fid']}
else:
raise Exception(
'Error! Tried importing parent name from json but could not find field'
)

def inherit_json(parent_json):
with open(parent_json, 'r') as f:
parent_json_values = json.load(f)

output = {
'parents': [get_name_from_json(parent_json_values)],
'metadata': get_parent_md_from_json(parent_json_values['metadata']),
}
return output

def inherit(parent_name):
output = {
'parents':[
{'did':parent_name}
],
'metadata':get_parent_md(parent_name),
}
return output

if __name__ == '__main__':
parser = ap()
parser.add_argument('--parent', '-p', type=str, required=True,
help='Parent file did (namespace:name)')
parser.add_argument('--json', '-j', type=str, required=True,
help='Output json file')
args = parser.parse_args()

output = inherit(args.parent)


# Serializing json
json_object = json.dumps(output, indent=2)

# Writing to sample.json
with open(args.json, "w") as outfile:
outfile.write(json_object)

62 changes: 62 additions & 0 deletions utilities/meta_maker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import place_metadata, get_events_for_md, inherit_metadata, add_origins
from argparse import ArgumentParser as ap
import os
import json

if __name__ == '__main__':

parser = ap()
parser.add_argument('--file', '-f', required=True, type=str,
help="File did for which we're making metadata (namespace:name)",
)
parser.add_argument('--get_events', action='store_true',
help='Get event numbers from artroot file')
place_metadata.base_args(parser)
add_origins.add_args(parser)
parser.add_argument('--parent', '-p', default=None, type=str,
help='Parent DID to inherit from (namespace:name)\nOR parent json metadata (requires --parent_as_json)')
parser.add_argument('--parent_as_json', action='store_true', help='') #TODO
parser.add_argument('--json', '-j', required=True, type=str,
help='Output json name')
args = parser.parse_args()

base_md = place_metadata.make_md_from_args(args)

output = {
'name':args.file.split(':')[1],
'namespace':args.file.split(':')[0],
'metadata':base_md
}

if args.get_events:
#Check that the file exists in this directoy
if not os.path.isfile(output['name']):
raise Exception(f'Want events but no file exists of name {output["name"]}')


#get events from file
events = get_events_for_md.get_events(output['name'])
##Put in metadta
get_events_for_md.place_events(events, output['metadata'])

if args.parent is not None:

##Get md from parent
if args.parent_as_json:
results = inherit_metadata.inherit_json(args.parent)
else:
results = inherit_metadata.inherit(args.parent)

#place the inherited info in the output
output['metadata'] |= results['metadata']

#place the parent info
output['parents'] = results['parents']

if args.past_apps is not None:
output['metadata'] |= add_origins.add_origins(args, args.app_version)

## Write the output
output_json = json.dumps(output, indent=2)
with open(args.json, 'w') as outfile:
outfile.write(output_json)
35 changes: 35 additions & 0 deletions utilities/place_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import json
from argparse import ArgumentParser as ap

def make_md_from_args(args):
md = {
'core.file_format':args.file_format,
'core.application.name':args.app_name,
'core.application.family':args.app_family,
'core.application.version':args.app_version,
'core.data_tier':args.data_tier,
'dune.config_file':args.fcl,
'dune.campaign':args.campaign,
}

if args.start_time is not None:
md['core.start_time'] = args.start_time
md['core.end_time'] = args.end_time
return md

def base_args(parser):
parser.add_argument('--start_time', default=None, type=float)
parser.add_argument('--end_time', default=None, type=float)
parser.add_argument('--file_format', type=str) ##TODO -- make required
parser.add_argument('--app_family', type=str)
parser.add_argument('--app_name', type=str)
parser.add_argument('--app_version', type=str)
parser.add_argument('--data_tier', type=str)
parser.add_argument('--fcl', type=str)
parser.add_argument('--campaign', type=str)

if __name__ == '__main__':
parser = ap()
parser.add_argument('--json', '-j', type=str, help='Output JSON file')
base_args(parser)
args = parser.parse_args()