change to package

abalon is now a package
Tagar · Nov 3, 2017 · e82a845 · e82a845
1 parent 90ca58a
commit e82a845
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 3 deletions.
diff --git a/__init__.py b/__init__.py
@@ -0,0 +1,3 @@
+
+__all__ = ["pyspark"]
+
diff --git a/abalon/__init__.py b/abalon/__init__.py
diff --git a/pyspark.py → abalon/pyspark.py b/pyspark.py → abalon/pyspark.py
@@ -1,10 +1,31 @@
 
+
+###########################################################################################################
+
+spark = None
 debug = False
 
+def pyspark_init (i_spark, i_debug):
+    '''
+    Initialize module-level variables
+
+    :param i_spark: an object of pyspark.sql.session.SparkSession
+    :param i_debug: debug output of the below functions?
+    '''
+
+    from pyspark.sql.session import SparkSession
+    if not isinstance(spark, pyspark.sql.session.SparkSession):
+        raise TypeError("spark parameter should be of type SparkSession")
+
+    global spark, debug
+    (spark, debug) = (i_spark, i_debug)
+
+
 ###########################################################################################################
 
 
 def file_to_df (df_name, file_path, header=True, delimiter='|', inferSchema=True, cache=False):
+
     """
         Reads in a delimited file and sets up a Spark dataframe 
         
@@ -32,6 +53,7 @@ def file_to_df (df_name, file_path, header=True, delimiter='|', inferSchema=True
 
 
 def sql_to_df (df_name, sql, cache=False):
+
     """
         Runs an sql query and sets up a Spark dataframe 
         
@@ -51,6 +73,7 @@ def sql_to_df (df_name, sql, cache=False):
 
 ###########################################################################################################
 
+
 def copyMerge (src_dir, dst_file, overwrite=False, deleteSource=False):
 
     """
@@ -87,10 +110,10 @@ def debug_print (message):
 
     try:
         # loop over files in alphabetical order and append them one by one to the target file
-        for file in files:
-            debug_print("Appending file {} into {}".format(file, dst_file))
+        for filename in files:
+            debug_print("Appending file {} into {}".format(filename, dst_file))
 
-            in_stream = fs.open(file)   # InputStream object
+            in_stream = fs.open(filename)   # InputStream object
             try:
                 hadoop.io.IOUtils.copyBytes(in_stream, out_stream, conf, False)     # False means don't close out_stream
             finally:

diff --git a/setup.py b/setup.py
@@ -0,0 +1,18 @@
+
+from setuptools import setup, find_packages
+
+# http://setuptools.readthedocs.io/en/latest/setuptools.html
+
+setup(name='abalon',
+      version='1.0',
+      packages=find_packages(),
+
+      install_requires=['docutils>=0.3'],
+
+      # metadata for upload to PyPI
+      description='Various utility functions for Apache Spark (pySpark)',
+      url='https://github.com/Tagar/abalon',
+      author='Ruslan Dautkhanov',
+      author_email='Dautkhanov@gmail.com',
+      license='Apache-2.0',
+    )