Skip to content

Commit

Permalink
bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Tagar committed Feb 18, 2018
1 parent 24cbb5c commit ea7523d
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
11 changes: 6 additions & 5 deletions abalon/spark/pivoter.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,9 @@ def AggSparkPivoter (df, all_vars=None, agg_op=operator.add):
'''

def agg_merge_two_dicts(x, y, agg_op):
return {k: agg_op(x.get(k, 0.0),
y.get(k, 0.0))
return {k: agg_op(float(x.get(k, 0)),
float(y.get(k, 0))
)
for k in set(x).union(y)
}

Expand All @@ -111,12 +112,12 @@ def map_dict_to_denseArray(idx, d):

spark = get_spark()

# assuming 2nd column is index column
idx_col = df.columns[1]
# assuming particular order of columns
(idx_col, key_col) = df.columns[0:1]

if not all_vars:
# get list of variables from the dataset:
all_vars = sorted([row[0] for row in df.select(idx_col).distinct().collect()])
all_vars = sorted([row[0] for row in df.select(key_col).distinct().collect()])

pivoted_rdd = (df.rdd
.map(lambda (idx, k, v): (idx, {k: v})) # convert k,v to a 1-element dict
Expand Down
2 changes: 1 addition & 1 deletion abalon/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

version = '2.2.1'
version = '2.2.2'


0 comments on commit ea7523d

Please sign in to comment.