Skip to content

Commit

Permalink
add hdfs_file_size()
Browse files Browse the repository at this point in the history
  • Loading branch information
Tagar committed Mar 15, 2018
1 parent 7aff06b commit e2ff46c
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 5 deletions.
7 changes: 4 additions & 3 deletions abalon/spark/pivoter.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,14 @@ def AggSparkPivoter (df, all_vars=None, agg_op=operator.add):
:param all_vars: list of all distinct values of `colname` column;
if not specified, datset will be scanned for all possible colnames;
the only reason it's passed to this function is so you can redefine order of pivoted columns;
:param agg_op: how to combine values when merging records for the same index (by default +)
:return: resulting dataframe
'''

def agg_merge_two_dicts(x, y, agg_op):
z = {k: agg_op(float(x.get(k, 0)),
float(y.get(k, 0))
)
z = {k: agg_op( float(x.get(k, 0))
, float(y.get(k, 0))
)
for k in set(x).union(y)
}
return z
Expand Down
18 changes: 17 additions & 1 deletion abalon/spark/sparkutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,22 @@ def hdfs_rename (src_name, dst_name):
return fs.rename(hadoop.fs.Path(src_name), hadoop.fs.Path(dst_name))


def hdfs_file_size (file_path):
'''
Returns file size of an HDFS file exists
:param file_path: file patch
:return: file size
'''

sparkutils_init()

# See https://hadoop.apache.org/docs/r2.8.2/api/org/apache/hadoop/fs/ContentSummary.html
# - getLength()
return fs.getContentSummary(hadoop.fs.Path(file_path)).getLength()



###########################################################################################################

def HDFScopyMerge (src_dir, dst_file, overwrite=False, deleteSource=False):
Expand Down Expand Up @@ -403,7 +419,7 @@ def HDFSwriteString (dst_file, content, overwrite=True, appendEOL=True):


def dataframeToHDFSfile (dataframe, dst_file, overwrite=False
, header='true', delimiter=','
, header=True, delimiter=','
, quoteMode='MINIMAL'
, quote='"', escape='\\'
):
Expand Down
2 changes: 1 addition & 1 deletion abalon/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

version = '2.2.5'
version = '2.2.6'


0 comments on commit e2ff46c

Please sign in to comment.