diff --git a/cmflib/cmfquery.py b/cmflib/cmfquery.py index 054569a2..e4c13cbe 100644 --- a/cmflib/cmfquery.py +++ b/cmflib/cmfquery.py @@ -617,6 +617,44 @@ def get_one_hop_parent_executions(self, execution_id: t.List[int], pipeline_id: list_exec.append(self.store.get_executions_by_id(exec)) return list_exec + def get_one_hop_parent_executions_ids(self, execution_id: t.List[int], pipeline_id: str = None) -> t.List[int]: + """Get parent execution ids for given execution id + Args: + execution_id : Execution id for which parent execution are required + It is passed in list, for example execution_id: [1] + pipeline_id : Pipeline id + Return: + Returns parent executions for given id + """ + artifacts: t.Optional = self._get_input_artifacts(execution_id) + if not artifacts: + return None + + exe_ids = [] + + for id in artifacts: + ids = self._get_executions_by_output_artifact_id(id, pipeline_id) + exe_ids.extend(ids) + return exe_ids + + def get_executions_with_execution_ids(self, exe_ids: t.List[int]): + """For list of execution ids it returns df with "id,Execution_type_name, Execution_uuid" + Args: + execution ids: List of execution ids. + Return: + ["id","Execution_type_name","Execution_uuid"] + """ + df = pd.DataFrame() + executions = self.store.get_executions_by_id(exe_ids) + for count, exe in enumerate(executions): + temp_dict = {} + temp_dict['id'] = exe_ids[count] + d1 = self._transform_to_dataframe(exe, temp_dict) + df = pd.concat([df, d1], sort=True, ignore_index=True) + df.drop_duplicates() + df = df[["id", "Execution_type_name","Execution_uuid"]] + return df + def get_one_hop_child_executions(self, execution_id: t.List[int]) -> t.List[int]: """Get artifacts produced by executions that consume given artifact. diff --git a/server/app/main.py b/server/app/main.py index 1dabe6c3..9af1e02c 100644 --- a/server/app/main.py +++ b/server/app/main.py @@ -21,6 +21,7 @@ ) from server.app.query_visualization import query_visualization from server.app.query_exec_lineage import query_exec_lineage +from server.app.query_tangled_lineage import query_tangled_lineage from pathlib import Path import os import json @@ -40,6 +41,7 @@ async def lifespan(app: FastAPI): dict_of_art_ids = await get_all_artifact_ids(server_store_path) # loaded execution ids with names into memory dict_of_exe_ids = await get_all_exe_ids(server_store_path) + yield dict_of_art_ids.clear() dict_of_exe_ids.clear() @@ -161,6 +163,7 @@ async def display_artifact_lineage(request: Request, pipeline_name: str): query = cmfquery.CmfQuery(server_store_path) if (pipeline_name in query.get_pipeline_names()): response=await get_lineage_data(server_store_path,pipeline_name,"Artifacts",dict_of_art_ids,dict_of_exe_ids) + #response = null return response else: return f"Pipeline name {pipeline_name} doesn't exist." @@ -203,6 +206,22 @@ async def display_exec_lineage(request: Request, exec_type: str, pipeline_name: else: response = None return response + +@app.get("/display_tree_lineage/{uuid}/{pipeline_name}") +async def display_tree_lineage(request: Request,uuid, pipeline_name: str): + ''' + returns dictionary of nodes and links for given execution_type. + response = { + nodes: [{id:"",name:"",execution_uuid:""}], + links: [{source:1,target:4},{}], + } + ''' + # checks if mlmd file exists on server + if os.path.exists(server_store_path): + query = cmfquery.CmfQuery(server_store_path) + if (pipeline_name in query.get_pipeline_names()): + response = await query_tangled_lineage(server_store_path, pipeline_name, dict_of_exe_ids,uuid) + return response # api to display artifacts available in mlmd @app.get("/display_artifacts/{pipeline_name}/{type}") diff --git a/server/app/query_tangled_lineage.py b/server/app/query_tangled_lineage.py new file mode 100644 index 00000000..74a95f8e --- /dev/null +++ b/server/app/query_tangled_lineage.py @@ -0,0 +1,90 @@ +import os +from cmflib import cmfquery +from collections import deque, defaultdict +import pandas as pd + +async def query_tangled_lineage(mlmd_path,pipeline_name, dict_of_exe_id,uuid): + query = cmfquery.CmfQuery(mlmd_path) + pipeline_id = query.get_pipeline_id(pipeline_name) + df=dict_of_exe_id[pipeline_name] + + #finding execution_id by comparing Execution_uuid (d09fdb26-0e9d-11ef-944f-4bf54f5aca7f) and uuid ('Prepare_u3tr') + result = df[df['Execution_uuid'].str[:4] == uuid] #result = df[id: "1","Execution_type_name", "Execution_uuid"] + execution_id=result["id"].tolist() + parents_set = set() + queue = deque() + df = pd.DataFrame() + + parents = query.get_one_hop_parent_executions_ids(execution_id,pipeline_id) #list if parent execution ids + dict_parents = {} + if parents == None: + parents = [] + dict_parents[execution_id[0]] = list(set(parents)) # [2] = [1,2,3,4] list of parent id + parents_set.add(execution_id[0]) #created so that we can directly find execuions using execution ids + for i in set(parents): + queue.append(i) + parents_set.add(i) + while len(queue) > 0: + exe_id = queue.popleft() + parents = query.get_one_hop_parent_executions_ids([exe_id],pipeline_id) + if parents == None: + parents = [] + dict_parents[exe_id] = list(set(parents)) + for i in set(parents): + queue.append(i) + parents_set.add(i) + + df = query.get_executions_with_execution_ids(list(parents_set)) # for execution_id get executions(complete df with all data of executions) + + df['name_uuid'] = df['Execution_type_name'] + '_' + df['Execution_uuid'] + result_dict = df.set_index('id')['name_uuid'].to_dict() # {"id" : "name_uuid"} for example {"2":"Prepare_d09fdb26-0e9d-11ef-944f-4bf54f5aca7f"} + + data_organized = topological_sort(dict_parents,result_dict) # it will use topological sort to create data from parents to child pattern + """ + data_organized format + [[{'id': 'Prepare_d09f', 'parents': []}], + [{'id': 'Featurize_fae6', 'parents': ['Prepare_d09f']}], + [{'id': 'Train_7fe7', 'parents': ['Featurize_fae6']}]] + """ + return data_organized + +def topological_sort(input_data,execution_id_dict): + # Initialize in-degree of all nodes to 0 + in_degree = {node: 0 for node in input_data} + # Initialize adjacency list + adj_list = defaultdict(list) + + # Fill the adjacency list and in-degree dictionary + for node, dependencies in input_data.items(): + for dep in dependencies: + adj_list[dep].append(node) + in_degree[node] += 1 + + # Queue for nodes with in-degree 0 + zero_in_degree_queue = deque([node for node, degree in in_degree.items() if degree == 0]) + topo_sorted_nodes = [] + + while zero_in_degree_queue: + current_node = zero_in_degree_queue.popleft() + topo_sorted_nodes.append(current_node) + for neighbor in adj_list[current_node]: + in_degree[neighbor] -= 1 + if in_degree[neighbor] == 0: + zero_in_degree_queue.append(neighbor) + # Transform sorted nodes into the required output format + parent_dict = defaultdict(list) + # creating list of list which contains dictionary of {"id":1,parents:"execution_name"} + for id_val in topo_sorted_nodes: # topo_sorted_nodes = ['1','2','3','4'] + if id_val in input_data: # input_data = {"child_id":[parents_id]}, for example {"4":['3','7','9']} + parents = tuple(sorted(input_data[id_val])) + # {tuple(parents): {'id':execution_name,'parents':["exec_1","exec_2","exec_3"]} + # append id,parents to key with same parents to get all child in same list + parent_dict[parents].append({'id': modify_exec_name(execution_id_dict[id_val]),'parents': [modify_exec_name(execution_id_dict[parent]) for parent in input_data[id_val]]}) + output_data= list(parent_dict.values()) + return output_data + +def modify_exec_name(exec_name_uuid): + after_first_slash=exec_name_uuid.split('/', 1)[1] + name='_'.join(after_first_slash.rsplit('_', 1)[:-1])# 'Test-env/Prepare_d09fdb26-0e9d-11ef-944f-4bf54f5aca7f' ---> Prepare + uuid=exec_name_uuid.split('_')[-1].split('-')[0][:4] # 'Test-env/Prepare_d09fdb26-0e9d-11ef-944f-4bf54f5aca7f' ---> d09f + return (name +"_"+uuid) # Prepare_d09f diff --git a/server/app/query_visualization_execution.py b/server/app/query_visualization_execution.py index 508f5c70..cbe96b62 100644 --- a/server/app/query_visualization_execution.py +++ b/server/app/query_visualization_execution.py @@ -16,7 +16,7 @@ def query_visualization_execution(mlmd_path, pipeline_name, dict_of_art_ids, dic list_of_exec = dict_of_exe_ids[pipeline_name]["Context_Type"].tolist() list_of_uuid = dict_of_exe_ids[pipeline_name]["Execution_uuid"].tolist() for exec_type, uuid in zip(list_of_exec, list_of_uuid): - list_of_exec_uuid.append(exec_type + "_" + uuid.split("-")[0][:4]) + list_of_exec_uuid.append(exec_type.split("/",1)[1] + "_" + uuid.split("-")[0][:4]) return list_of_exec_uuid #print(query_visualization_execution("/home/chobey/cmf-server/data/mlmd","image")) diff --git a/ui/Dockerfile b/ui/Dockerfile index 83bf67bc..f65a618e 100644 --- a/ui/Dockerfile +++ b/ui/Dockerfile @@ -4,6 +4,7 @@ ENV PATH /app/node_modules/.bin:$PATH COPY package.json ./ COPY package-lock.json ./ RUN npm install --silent +RUN npm install svg.js RUN npm install d3 RUN npm install react-scripts@5.0.1 -g --silent COPY . ./ diff --git a/ui/src/client.js b/ui/src/client.js index 1ce882ac..a439c7c6 100644 --- a/ui/src/client.js +++ b/ui/src/client.js @@ -84,6 +84,14 @@ class FastAPIClient { }); } + async getExecTreeLineage(pipeline,uuid) { + return this.apiClient.get(`/display_tree_lineage/${uuid}/${pipeline}`) + .then(({ data }) => { + return data; + }); + } + + async getExecutions(pipelineName, page, sortField, sortOrder , filterBy, filterValue) { return this.apiClient .get(`/display_executions/${pipelineName}`, { diff --git a/ui/src/components/ExecutionTangledDropdown/index.css b/ui/src/components/ExecutionTangledDropdown/index.css new file mode 100644 index 00000000..c9cfb174 --- /dev/null +++ b/ui/src/components/ExecutionTangledDropdown/index.css @@ -0,0 +1,21 @@ +.dropdown { + position: relative; + display: inline-block; +} + +.dropdown-select { + appearance: none; + padding: 8px 16px; + font-size: 14px; + border: 1px solid #ccc; + border-radius: 4px; + outline: none; + cursor: pointer; + transition: border-color 0.3s ease; +} + +.dropdown-select:hover, +.dropdown-select:focus { + border-color: #4a90e2; +} + diff --git a/ui/src/components/ExecutionTangledDropdown/index.jsx b/ui/src/components/ExecutionTangledDropdown/index.jsx new file mode 100644 index 00000000..5e2898b5 --- /dev/null +++ b/ui/src/components/ExecutionTangledDropdown/index.jsx @@ -0,0 +1,37 @@ +import React, { useState,useEffect } from "react"; + +const ExecutionTangledDropdown = ({data,exec_type,handleTreeClick}) => { + const [selectedExecutionType, setSelectedExecutionType] = useState(''); + + useEffect(() => { + if (exec_type) { + setSelectedExecutionType(exec_type); + } + }, [exec_type]); + + const handleCallExecutionClick = (event) => { + handleTreeClick(event.target.value); + }; + + return ( +
+ +
+ + ); +}; + +export default ExecutionTangledDropdown; + diff --git a/ui/src/components/ExecutionTree/index.jsx b/ui/src/components/ExecutionTree/index.jsx new file mode 100644 index 00000000..1ed8cb50 --- /dev/null +++ b/ui/src/components/ExecutionTree/index.jsx @@ -0,0 +1,252 @@ +import React, { useEffect, useRef, useState } from 'react'; +import * as d3 from 'd3'; +import _ from 'lodash'; + +let tangled_width; +let tangled_height; + +const constructTangleLayout = (levels, options = {}) => { + // The layout calculation logic remains the same + levels.forEach((l, i) => l.forEach(n => (n.level = i))); + var nodes = levels.reduce((a, x) => a.concat(x), []); + var nodes_index = {}; + nodes.forEach(d => (nodes_index[d.id] = d)); + + nodes.forEach(d => { + d.parents = (d.parents === undefined ? [] : d.parents).map(p => nodes_index[p]); + }); + + levels.forEach((l, i) => { + var index = {}; + l.forEach(n => { + if (n.parents.length === 0) { + return; + } + + var id = n.parents + .map(d => d.id) + .sort() + .join('-X-'); + if (id in index) { + index[id].parents = index[id].parents.concat(n.parents); + } else { + index[id] = { id: id, parents: n.parents.slice(), level: i, span: i - d3.min(n.parents, p => p.level) }; + } + n.bundle = index[id]; + }); + l.bundles = Object.keys(index).map(k => index[k]); + l.bundles.forEach((b, i) => (b.i = i)); + }); + + var links = []; + nodes.forEach(d => { + d.parents.forEach(p => links.push({ source: d, bundle: d.bundle, target: p })); + }); + + var bundles = levels.reduce((a, x) => a.concat(x.bundles), []); + + bundles.forEach(b => + b.parents.forEach(p => { + if (p.bundles_index === undefined) { + p.bundles_index = {}; + } + if (!(b.id in p.bundles_index)) { + p.bundles_index[b.id] = []; + } + p.bundles_index[b.id].push(b); + }) + ); + + nodes.forEach(n => { + if (n.bundles_index !== undefined) { + n.bundles = Object.keys(n.bundles_index).map(k => n.bundles_index[k]); + } else { + n.bundles_index = {}; + n.bundles = []; + } + n.bundles.sort((a, b) => d3.descending(d3.max(a, d => d.span), d3.max(b, d => d.span))); + n.bundles.forEach((b, i) => (b.i = i)); + }); + + links.forEach(l => { + if (l.bundle.links === undefined) { + l.bundle.links = []; + } + l.bundle.links.push(l); + }); + + const padding = 8; + const node_height = 22; + const node_width = 70; + const bundle_width = 14; + const level_y_padding = 16; + const metro_d = 4; + const min_family_height = 22; + + + options.c ||= 16; + const c = options.c; + options.bigc ||= node_width + c; + + nodes.forEach(n => (n.height = (Math.max(1, n.bundles.length) - 1) * metro_d)); + + var x_offset = padding; + var y_offset = padding; + levels.forEach(l => { + x_offset += l.bundles.length * bundle_width; + y_offset += level_y_padding; + l.forEach((n, i) => { + n.x = n.level * node_width + x_offset; + n.y = node_height + y_offset + n.height / 2; + + y_offset += node_height + n.height; + }); + }); + + var i = 0; + levels.forEach(l => { + l.bundles.forEach(b => { + b.x = d3.max(b.parents, d => d.x) + node_width + (l.bundles.length - 1 - b.i) * bundle_width; + b.y = i * node_height; + }); + i += l.length; + }); + + links.forEach(l => { + l.xt = l.target.x; + l.yt = l.target.y + l.target.bundles_index[l.bundle.id].i * metro_d - (l.target.bundles.length * metro_d) / 2 + metro_d / 2; + l.xb = l.bundle.x; + l.yb = l.bundle.y; + l.xs = l.source.x; + l.ys = l.source.y; + }); + + var y_negative_offset = 0; + levels.forEach(l => { + y_negative_offset += -min_family_height + (d3.min(l.bundles, b => d3.min(b.links, link => link.ys - 2 * c - (link.yt + c))) || 0); + l.forEach(n => (n.y -= y_negative_offset)); + }); + + links.forEach(l => { + l.yt = l.target.y + l.target.bundles_index[l.bundle.id].i * metro_d - (l.target.bundles.length * metro_d) / 2 + metro_d / 2; + l.ys = l.source.y; + l.c1 = l.source.level - l.target.level > 1 ? Math.min(options.bigc, l.xb - l.xt, l.yb - l.yt) - c : c; + l.c2 = c; + }); + + var layout = { + width: d3.max(nodes, n => n.x) + node_width + 2 * padding, + height: d3.max(nodes, n => n.y) + node_height / 2 + 2 * padding, + node_height, + node_width, + bundle_width, + level_y_padding, + metro_d + }; + return { levels, nodes, nodes_index, links, bundles, layout }; +}; + +const renderChart = (data, options = {}) => { + options.color ||= (d, i) => options.color(i); // Default color function + options.background_color ||= 'white'; // Default background color + + const tangleLayout = constructTangleLayout(_.cloneDeep(data), options); + tangled_width = tangleLayout.layout.width; + tangled_height = tangleLayout.layout.height; + const textPadding = 12; + const labelOffset = 4; + return ( + <> + + + {tangleLayout.bundles.map((b, i) => { + let d = b.links + .map( + l => ` + M${l.xt + textPadding} ${l.yt + textPadding} + L${l.xb - l.c1 + textPadding} ${l.yt + textPadding} + A${l.c1} ${l.c1} 90 0 1 ${l.xb + textPadding} ${l.yt + l.c1 + textPadding} + L${l.xb + textPadding} ${l.ys - l.c2 + textPadding} + A${l.c2} ${l.c2} 90 0 0 ${l.xb + l.c2 + textPadding} ${l.ys + textPadding} + L${l.xs + textPadding} ${l.ys + textPadding}` + ) + .join(''); + return ( + + + + + ); + })} + {tangleLayout.nodes.map(n => ( + + + + + {n.id} + + + {n.id} + + ))} + + + ); +}; + +const ExecutionTree = ({ data }) => { + const chartContainerRef = useRef(null); + const [chart, setChart] = useState(null); + + useEffect(() => { + if (!data) { + // Data is not yet available + return; + } + const options = { + color: d3.scaleOrdinal(d3.schemeDark2), // Use provided color scale + background_color: 'white' // Provided background color + }; + const renderedChart = renderChart(data, options); + setChart(renderedChart); + }, [data]); + + return ( +
+ {chart} +
+ ); +}; + +export default ExecutionTree; diff --git a/ui/src/pages/lineage/index.jsx b/ui/src/pages/lineage/index.jsx index bd498f1c..d765c2ab 100644 --- a/ui/src/pages/lineage/index.jsx +++ b/ui/src/pages/lineage/index.jsx @@ -24,12 +24,15 @@ import LineageSidebar from "../../components/LineageSidebar"; import LineageTypeSidebar from "./LineageTypeSidebar"; import LineageArtifacts from "../../components/LineageArtifacts"; import ExecutionDropdown from "../../components/ExecutionDropdown"; +import ExecutionTree from "../../components/ExecutionTree"; +import ExecutionTangledDropdown from "../../components/ExecutionTangledDropdown"; + const client = new FastAPIClient(config); const Lineage = () => { const [pipelines, setPipelines] = useState([]); const [selectedPipeline, setSelectedPipeline] = useState(null); - const LineageTypes=['Artifacts','Execution']; + const LineageTypes=['Artifacts','Execution','Tangled_Exec']; const [selectedLineageType, setSelectedLineageType] = useState('Artifacts'); const [selectedExecutionType, setSelectedExecutionType] = useState(null); const [lineageData, setLineageData]=useState(null); @@ -69,7 +72,7 @@ const Lineage = () => { fetchArtifactLineage(pipeline); } else { - fetchExecutionTypes(pipeline); + fetchExecutionTypes(pipeline, selectedLineageType); }} }; @@ -80,8 +83,11 @@ const Lineage = () => { if (lineageType === "Artifacts") { fetchArtifactLineage(selectedPipeline); } + else if (lineageType === "Execution") { + fetchExecutionTypes(selectedPipeline, lineageType); + } else { - fetchExecutionTypes(selectedPipeline); + fetchExecutionTypes(selectedPipeline, lineageType); } }; @@ -96,18 +102,24 @@ const Lineage = () => { setLineageArtifactsKey((prevKey) => prevKey + 1); }; - const fetchExecutionTypes = (pipelineName) => { + const fetchExecutionTypes = (pipelineName, lineageType) => { client.getExecutionTypes(pipelineName).then((data) => { if (data === null ) { setExecDropdownData(null); } else { setExecDropdownData(data); - setSelectedExecutionType(data[0]); - const typeParts = data[0].split('/'); - const exec_type = typeParts[1].split('_')[0]; - const uuid= typeParts[1].split('_').slice(-1)[0]; - fetchExecutionLineage(pipelineName, exec_type,uuid); + setSelectedExecutionType(data[0]); // data[0] = "Prepare_3f45" + // method used such that even with multiple "_" it will get right execution_name and uuid + const last_underscore_index = data[0].lastIndexOf('_'); + const exec_type = data[0].substring(0, last_underscore_index); // Prepare + const uuid= (data[0].split("_").pop()); // 3f45 + if (lineageType === "Execution") { + fetchExecutionLineage(pipelineName, exec_type,uuid); + } + else { + fetchExecTree(pipelineName,uuid); + } } }); @@ -117,11 +129,20 @@ const Lineage = () => { // used for execution drop down const handleExecutionClick = (executionType) => { setExecutionData(null); + setSelectedExecutionType(executionType); - const typeParts = executionType.split('/'); - const type = typeParts[1].split('_')[0]; - const uuid= typeParts[1].split('_').slice(-1)[0]; - fetchExecutionLineage(selectedPipeline, type,uuid); + const last_underscore_index = executionType.lastIndexOf('_'); + const exec_type = executionType.substring(0, last_underscore_index); + const uuid= (executionType.split("_").pop()); + fetchExecutionLineage(selectedPipeline, exec_type,uuid); + }; + + // used for execution drop down + const handleTreeClick = (executionType) => { + setExecutionData(null); + setSelectedExecutionType(executionType); + const uuid= (executionType.split("_").pop()); + fetchExecTree(selectedPipeline, uuid); }; const fetchExecutionLineage = (pipelineName, type,uuid) => { @@ -133,6 +154,12 @@ const Lineage = () => { }); }; + const fetchExecTree = (pipelineName,exec_type) => { + client.getExecTreeLineage(pipelineName,exec_type).then((data) => { + setExecutionData(data); + }); + }; + return ( <>
{ )} + {selectedPipeline !== null && selectedLineageType === "Tangled_Exec" && execDropdownData !== null &&( +
+ +
+ )} + {selectedPipeline !== null && selectedLineageType === "Tangled_Exec" && execDropdownData !== null && executionData !== null &&( +
+ +
+ )} + +