Skip to content

Commit

Permalink
also change in rdd.py and AsyncRDD
Browse files Browse the repository at this point in the history
  • Loading branch information
yingjieMiao committed Oct 6, 2014
1 parent d31ff7e commit 1d2c410
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,9 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
if (results.size == 0) {
numPartsToTry = totalParts - 1
} else {
numPartsToTry = (1.5 * num * partsScanned / results.size).toInt
numPartsToTry = ((1.5 * num * partsScanned / results.size).toInt - partsScanned) max 1 // the left side of max is >=1 whenever partsScanned >= 2
}
}
numPartsToTry = math.max(0, numPartsToTry) // guard against negative num of partitions

val left = num - results.size
val p = partsScanned until math.min(partsScanned + numPartsToTry, totalParts)
Expand Down
3 changes: 2 additions & 1 deletion python/pyspark/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,8 @@ def take(self, num):
if len(items) == 0:
numPartsToTry = partsScanned * 4
else:
numPartsToTry = int(1.5 * num * partsScanned / len(items))
#the first paramter of max is >=1 whenever partsScanned >= 2
numPartsToTry = max(int(1.5 * num * partsScanned / len(items)) - partsScanned, 1)

left = num - len(items)

Expand Down

0 comments on commit 1d2c410

Please sign in to comment.