Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(Closes #2716) Add support for module-inlining calls to polymorphic kernels/routines #2732

Draft
wants to merge 35 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
3bbdef5
#2716 add initial fix and test [skip ci]
arporter Sep 17, 2024
459e26d
#2716 fix linting
arporter Sep 17, 2024
37188bf
#2716 rm check for polymorphic kernels, ensure renamed kern is public
arporter Sep 18, 2024
4aa1c0c
#2716 fix linting
arporter Sep 18, 2024
dd29ca3
Merge branch 'master' into 2716_transform_interface_bug
arporter Sep 27, 2024
4383c7c
#2716 WIP exploring options
arporter Sep 27, 2024
60f4add
Merge branch 'master' into 2716_transform_interface_bug
arporter Oct 2, 2024
62b5da5
#2716 WIP plumbing-in inlining of multiple kernel routines
arporter Oct 2, 2024
5a10b88
#2716 more fixes [skip ci]
arporter Oct 2, 2024
d217661
Merge branch 'master' into 2716_transform_interface_bug
arporter Oct 3, 2024
cc657a9
#2716 get KernelModuleInlineTrans tests working [skip ci]
arporter Oct 3, 2024
08f18c8
#2716 fix linting
arporter Oct 3, 2024
53147c6
#2716 more linting
arporter Oct 3, 2024
4198608
#2716 fix a lot of tests
arporter Oct 3, 2024
e079988
#2716 fix remaining tests
arporter Oct 3, 2024
ca153a4
#2716 fix examples
arporter Oct 3, 2024
e35bcb2
Merge branch 'master' into 2716_transform_interface_bug
arporter Oct 3, 2024
71a2630
#2716 revert some unnecessary changes
arporter Oct 3, 2024
ba58c82
#2716 tidying and improving comments/docstrings
arporter Oct 4, 2024
2ac83b5
#2716 add tests for KernelModuleInlineTrans
arporter Oct 7, 2024
e5d5699
#2716 fix coverage of gocean_move_iteration_boundaries_inside
arporter Oct 7, 2024
f026e21
#2716 rm need for polymorphic checks for GOcean Kernels
arporter Oct 7, 2024
6925697
#2716 improve coverage
arporter Oct 7, 2024
0b6ce32
#2716 improve _rm_imported_symbol and only attempt to add interface s…
arporter Oct 8, 2024
a00f367
#2716 add InterfaceDeclGen to f2pygen
arporter Oct 8, 2024
2b82201
#2716 fixes for the transformation in LFRic
arporter Oct 8, 2024
b9987fc
Merge branch 'master' into 2716_transform_interface_bug
arporter Oct 8, 2024
0fd9464
#2716 fix tests broken by merge
arporter Oct 8, 2024
2e76d3a
#2716 update opt script in repo and fix OMPDeclareTargetTrans
arporter Oct 8, 2024
721ff5d
#2716 mark MATMUL as available on GPU
arporter Oct 8, 2024
8319e95
#2716 fix test for matmul on gpu
arporter Oct 8, 2024
33376ff
#2716 ensure Kern points to inlined PSyIR after transformation [skip ci]
arporter Oct 9, 2024
e8b3c0b
#2716 improvements to validation of calls that resolve to multiple ro…
arporter Oct 10, 2024
0903b0a
#2716 add new inlining test
arporter Oct 10, 2024
a8d357d
#2716 add new test source file
arporter Oct 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions examples/gocean/eg3/ocl_trans.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@
''' Module providing a transformation script that converts the Schedule of
the first Invoke to use OpenCL. '''

from psyclone.psyir.transformations import \
FoldConditionalReturnExpressionsTrans
from psyclone.domain.gocean.transformations import GOOpenCLTrans, \
GOMoveIterationBoundariesInsideKernelTrans
from psyclone.psyir.transformations import (
FoldConditionalReturnExpressionsTrans)
from psyclone.domain.gocean.transformations import (
GOOpenCLTrans, GOMoveIterationBoundariesInsideKernelTrans)


def trans(psy):
Expand Down Expand Up @@ -68,7 +68,10 @@ def trans(psy):
move_boundaries_trans.apply(kern)
# Change the syntax to remove the return statements introduced by the
# previous transformation
fold_trans.apply(kern.get_kernel_schedule())
_, kschedules = kern.get_kernel_schedule()
# NOTE: we assume the kernel is not polymorphic and thus there is
# only one schedule associated with it.
fold_trans.apply(kschedules[0])
# Specify the OpenCL queue and workgroup size of the kernel
# In this case we dispatch each kernel in a different queue to check
# that the output code has the necessary barriers to guarantee the
Expand Down
7 changes: 5 additions & 2 deletions examples/lfric/eg15/matvec_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@


def trans(psy):
'''PSyclone transformation script for the Dynamo0.3 API to optimise
'''PSyclone transformation script for the LFRic API to optimise
the matvec kernel for many-core CPUs. For the moment simply find
the first matvec kernel in the example, transform the matmul
intrinsic to equivalant inline code and then print out its PSyIR
Expand All @@ -90,7 +90,10 @@ def trans(psy):
schedule = invoke.schedule
for kernel in schedule.coded_kernels():
if kernel.name.lower() == "matrix_vector_kernel_code":
kernel_schedule = kernel.get_kernel_schedule()
_, kernel_schedules = kernel.get_kernel_schedule()
# For simplicity, ASSUME that the kernel is not polymorphic and
# thus only has one schedule.
kernel_schedule = kernel_schedules[0]
# Replace matmul with inline code
for icall in kernel_schedule.walk(IntrinsicCall):
if icall.intrinsic is IntrinsicCall.Intrinsic.MATMUL:
Expand Down
11 changes: 10 additions & 1 deletion examples/lfric/scripts/gpu_offloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
'''
import os
import sys
from psyclone.domain.common.transformations import KernelModuleInlineTrans
from psyclone.domain.lfric import LFRicConstants
from psyclone.psyir.nodes import Directive, Loop
from psyclone.psyir.transformations import (
Expand Down Expand Up @@ -73,6 +74,7 @@ def trans(psy):
otrans = Dynamo0p3OMPLoopTrans()
const = LFRicConstants()
cpu_parallel = OMPParallelTrans()
intrans = KernelModuleInlineTrans()

if OFFLOAD_DIRECTIVES == "omp":
# Use OpenMP offloading
Expand Down Expand Up @@ -119,7 +121,8 @@ def trans(psy):
else:
offload = True

# Keep a record of any kernels we fail to offload
# Keep a record of any kernels we fail to offload.
failed_inline = set()
failed_to_offload = set()

# Colour loops over cells unless they are on discontinuous spaces
Expand All @@ -136,6 +139,12 @@ def trans(psy):
if loop.iteration_space == "cell_column":
if offload:
for kern in loop.kernels():
try:
intrans.apply(kern)
except TransformationError as err:
failed_inline.add(kern.name.lower())
print(f"Failed to module-inline kernel "
f"'{kern.name}' due to:\n{err.value}")
try:
gpu_annotation_trans.apply(kern)
except TransformationError as err:
Expand Down
11 changes: 6 additions & 5 deletions examples/lfric/scripts/kernel_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,12 @@ def trans(psy):
# Loop over all of the Kernels in this Schedule.
for kernel in schedule.coded_kernels():
try:
kernel_schedule = kernel.get_kernel_schedule()
if kernel_schedule not in already_printed:
kern = fortran_writer(kernel_schedule)
print(kern)
already_printed.append(kernel_schedule)
_, kernel_schedules = kernel.get_kernel_schedule()
for ksched in kernel_schedules:
if ksched not in already_printed:
kern = fortran_writer(ksched)
print(kern)
already_printed.append(ksched)
except Exception as err: # pylint: disable=broad-except
print(f"Code of '{kernel.name}' in '{invoke.name}' "
f"cannot be printed because:\n{err}")
Expand Down
4 changes: 3 additions & 1 deletion examples/xdsl/backend/xdsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,9 @@ def checkIfStringIsType(self, string, typ):

def nemokern_node(self, node):
exec_statements = []
schedule = node.get_kernel_schedule()
_, schedules = node.get_kernel_schedule()
# IGNORE polymorphic routines.
schedule = schedules[0]
for child in schedule.children:
exec_statements.append(self._visit(child))
return exec_statements
Expand Down
Loading
Loading