Skip to content

Commit

Permalink
Merge pull request #333 from jwittenbach/fromrdd-ordered
Browse files Browse the repository at this point in the history
adds ability to specify ordered or not on fromrdd
  • Loading branch information
freeman-lab committed Jun 15, 2016
2 parents 9e5c6c9 + a5742c3 commit 9599cda
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
7 changes: 5 additions & 2 deletions thunder/images/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
spark = check_spark()


def fromrdd(rdd, dims=None, nrecords=None, dtype=None, labels=None):
def fromrdd(rdd, dims=None, nrecords=None, dtype=None, labels=None, ordered=False):
"""
Load images from a Spark RDD.
Expand All @@ -31,6 +31,9 @@ def fromrdd(rdd, dims=None, nrecords=None, dtype=None, labels=None):
labels : array, optional, default = None
Labels for records. If provided, should be one-dimensional.
ordered : boolean, optional, default = False
Whether or not the rdd is ordered by key
"""
from .images import Images
from bolt.spark.array import BoltArraySpark
Expand All @@ -49,7 +52,7 @@ def process_keys(record):
k = (k,)
return k, v

values = BoltArraySpark(rdd.map(process_keys), shape=(nrecords,) + tuple(dims), dtype=dtype, split=1)
values = BoltArraySpark(rdd.map(process_keys), shape=(nrecords,) + tuple(dims), dtype=dtype, split=1, ordered=ordered)
return Images(values, labels=labels)

def fromarray(values, labels=None, npartitions=None, engine=None):
Expand Down
7 changes: 5 additions & 2 deletions thunder/series/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
spark = check_spark()


def fromrdd(rdd, nrecords=None, shape=None, index=None, labels=None, dtype=None):
def fromrdd(rdd, nrecords=None, shape=None, index=None, labels=None, dtype=None, ordered=False):
"""
Load series data from a Spark RDD.
Expand All @@ -37,6 +37,9 @@ def fromrdd(rdd, nrecords=None, shape=None, index=None, labels=None, dtype=None)
dtype : string, default = None
Data numerical type (if provided will avoid check)
ordered : boolean, optional, default = False
Whether or not the rdd is ordered by key
"""
from .series import Series
from bolt.spark.array import BoltArraySpark
Expand Down Expand Up @@ -65,7 +68,7 @@ def process_keys(record):
k = (k,)
return k, v

values = BoltArraySpark(rdd.map(process_keys), shape=shape, dtype=dtype, split=len(shape)-1)
values = BoltArraySpark(rdd.map(process_keys), shape=shape, dtype=dtype, split=len(shape)-1, ordered=ordered)
return Series(values, index=index, labels=labels)

def fromarray(values, index=None, labels=None, npartitions=None, engine=None):
Expand Down

0 comments on commit 9599cda

Please sign in to comment.