-
Notifications
You must be signed in to change notification settings - Fork 67
/
14-2.0-Overview-Demo-Spark-Session.scala
executable file
·1 lines (1 loc) · 16.1 KB
/
14-2.0-Overview-Demo-Spark-Session.scala
1
{"version":"NotebookV1","origId":503877321546219,"name":"14-2.0-Overview-Demo-Spark-Session","language":"scala","commands":[{"version":"CommandV1","origId":503877321546221,"guid":"92b8322b-edf8-4770-b6af-f0191871193d","subtype":"command","commandType":"auto","position":1.0,"command":"%md\n# Demo 2. SparkSession - the new entry point","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"9de26ebc-e33a-45e5-a444-c384e44de601"},{"version":"CommandV1","origId":503877321546222,"guid":"0c30bfe1-e290-451c-8a65-06c14f51a26f","subtype":"command","commandType":"auto","position":2.0,"command":"import org.apache.spark.sql.Dataset\n\nimplicit class DatasetDisplay(ds: Dataset[_]) {\n def display(): Unit = {\n com.databricks.backend.daemon.driver.EnhancedRDDFunctions.display(ds)\n }\n}","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"2560484c-7473-4fb8-8663-830e85ef5a96"},{"version":"CommandV1","origId":503877321546223,"guid":"d1a6e7bc-f9d9-48dd-9c60-563c6052687d","subtype":"command","commandType":"auto","position":3.0,"command":"%md\n### Unified entry point for reading data","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"a2b4bfb6-a7a2-46e9-8b61-d3ca45ede0a9"},{"version":"CommandV1","origId":503877321546224,"guid":"0ed0f9ef-03c0-4427-a605-9dce55e50e95","subtype":"command","commandType":"auto","position":4.0,"command":"spark","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"751d7362-f6cb-4326-b57a-c8e2aebfb0bf"},{"version":"CommandV1","origId":503877321546225,"guid":"9692af2d-21af-4538-bf6f-4b5c15486dab","subtype":"command","commandType":"auto","position":5.0,"command":"val df = spark.read.option(\"header\", \"true\").option(\"inferSchema\", \"true\").csv(\"/databricks-datasets/samples/population-vs-price/data_geo.csv\")","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"336e67ee-8ac1-4f68-ae72-2c04c7992027"},{"version":"CommandV1","origId":503877321546226,"guid":"766ca615-509f-4c71-93ed-98257d81424e","subtype":"command","commandType":"auto","position":6.0,"command":"%md\n### Working with config options","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"0f6a9d17-e0a7-46ff-9759-d1d073e33c3b"},{"version":"CommandV1","origId":503877321546227,"guid":"eba0fc98-32f1-4874-aecb-c30f369b059a","subtype":"command","commandType":"auto","position":7.0,"command":"// The configs are mutable and can be used to toggle optimizer behavior.\n// Configs are also automatically propagated to Hadoop Configuration during I/O.\nspark.conf.set(\"spark.some.config\", \"abcd\")","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"72a85a93-938f-4d25-8e35-8208ce99916f"},{"version":"CommandV1","origId":503877321546228,"guid":"fd152163-6b7b-4dc3-b3e2-6b9bf6bde9ee","subtype":"command","commandType":"auto","position":8.0,"command":"spark.conf.get(\"spark.some.config\")","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"06732ada-5f4d-4fbf-ab0e-a5e6984bf8a6"},{"version":"CommandV1","origId":503877321546229,"guid":"d3bc17ac-a126-4713-8712-82799f036407","subtype":"command","commandType":"auto","position":9.0,"command":"spark.sql(\"select \\\"${spark.some.config}\\\"\").display()","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"dbc4d2f8-b2da-455f-9e72-d0a72660ffe2"},{"version":"CommandV1","origId":503877321546230,"guid":"a591c015-d8a5-4b18-85d5-af29b5fc9fa6","subtype":"command","commandType":"auto","position":10.0,"command":"%md\n### Running SQL over data","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"b3974af0-efb7-4015-b4e8-90bbb4db5552"},{"version":"CommandV1","origId":503877321546231,"guid":"cb61e302-4167-4ef1-8efb-033b1b0400a9","subtype":"command","commandType":"auto","position":11.0,"command":"df.withColumnRenamed(\"2014 rank\", \"rank\")\n .withColumnRenamed(\"State Code\", \"stateCode\")\n .withColumnRenamed(\"2014 Population Estimate\", \"popEstimate\")\n .withColumnRenamed(\"2015 median sales price\", \"medianSalePrice\")\n .write\n .saveAsTable(\"geodata\")","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"c6b5c3b6-1e71-4ae3-9ff0-0c9036d39386"},{"version":"CommandV1","origId":503877321546232,"guid":"894ec379-d098-4634-9944-f32fb979e4fc","subtype":"command","commandType":"auto","position":12.0,"command":"spark.sql(\"select * from geodata\").display()","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"2563880e-362e-4e53-a4d1-b092668dbf2b"},{"version":"CommandV1","origId":503877321546233,"guid":"ad8e7f61-3930-412b-bf89-9e3c70e4837e","subtype":"command","commandType":"auto","position":13.0,"command":"%md\n### Working with metadata directly","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"3db662c4-5b79-4830-a5fa-1c2e52560a61"},{"version":"CommandV1","origId":503877321546234,"guid":"4441d360-6918-4b51-99b5-90a7abb0b0e2","subtype":"command","commandType":"auto","position":14.0,"command":"spark.catalog.listTables().display()","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"613ff989-3cd5-440c-bb44-a711592840f7"},{"version":"CommandV1","origId":503877321546235,"guid":"eb837eb7-71b9-4b8f-b975-7673921bf9d3","subtype":"command","commandType":"auto","position":15.0,"command":"spark.catalog.listColumns(\"geodata\").display()","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"aa98c54e-44b7-4a97-b56e-16aee0d83c15"},{"version":"CommandV1","origId":503877321546236,"guid":"71552b1b-b688-4274-bf61-8fd78acd30ae","subtype":"command","commandType":"auto","position":16.0,"command":"%md\n### Also have access to the underlying SparkContext","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"dae2d240-9830-44cb-8067-a568d54e92c5"},{"version":"CommandV1","origId":503877321546237,"guid":"6044ccec-a8f9-4382-889b-2fcccbba84a5","subtype":"command","commandType":"auto","position":17.0,"command":"spark.sparkContext","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"934fe387-0618-4013-a15a-068666d5a8e1"},{"version":"CommandV1","origId":503877321546238,"guid":"e6634ab9-e2e2-424b-82df-9bb598530c9e","subtype":"command","commandType":"auto","position":18.0,"command":"","commandVersion":0,"state":"finished","results":null,"errorSummary":null,"error":null,"workflows":[],"startTime":0.0,"submitTime":0.0,"finishTime":0.0,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"streamStates":{},"nuid":"5c5cac89-e929-4270-a3d7-c0c787bd0e96"}],"dashboards":[],"guid":"18cc28ae-9988-4a8b-aba5-eec6ec38ce8d","globalVars":{},"iPythonMetadata":null,"inputWidgets":{}}