22/02/23 06:22:43 INFO ClusterLoadMonitor: Added query with execution ID:8. Current active queries:1 22/02/23 06:22:43 INFO OpenLineageSparkListener: WILLJ: Starting up a SparkSQLExecStart with executionid |8| 22/02/23 06:22:43 INFO ContextFactory: Creating a new execution context for executionid |8| 22/02/23 06:22:43 INFO ContextFactory: WILLJ: Simple String == Physical Plan == Execute SaveIntoDataSourceCommand +- SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv 22/02/23 06:22:43 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute SaveIntoDataSourceCommand +- SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv ] with input dataset builders [io.openlineage.spark.agent.lifecycle.plan.LogicalRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.LogicalRDDVisitor, io.openlineage.spark.agent.lifecycle.plan.BigQueryNodeVisitor, io.openlineage.spark.agent.lifecycle.plan.KafkaRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.SqlDWDatabricksVisitor, io.openlineage.spark.agent.lifecycle.plan.CommandPlanVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2RelationVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2ScanRelationVisitor] 22/02/23 06:22:43 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute SaveIntoDataSourceCommand +- SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv ] with output dataset builders [] 22/02/23 06:22:43 INFO DefaultJDBCWrapper$: Executing statement: SELECT @@VERSION OPTION (LABEL = 'Databricks Get DW @@version; Container Build 6e3700d') 22/02/23 06:22:43 INFO DefaultJDBCWrapper$: Executing statement: SELECT @@VERSION OPTION (LABEL = 'Databricks Get DW @@version; Container Build 6e3700d') 22/02/23 06:22:43 INFO SqlDwWriter: Executing preActions 22/02/23 06:22:43 INFO SqlDwWriter: Loading new data to: "exampleOutput" 22/02/23 06:22:43 INFO DefaultJDBCWrapper$: Executing statement: SELECT CONVERT(varchar(200), DATABASEPROPERTYEX('SQLPool1', 'ServiceObjective')) AS ServiceObjective 22/02/23 06:22:43 INFO SqlDwWriter: Using COPY as write semantics 22/02/23 06:22:43 INFO DefaultJDBCWrapper$: Executing statement: SELECT CONVERT(varchar(200), DATABASEPROPERTYEX('SQLPool1', 'ServiceObjective')) AS ServiceObjective 22/02/23 06:22:43 INFO SqlDwWriter: Database performance level is Gen2 22/02/23 06:22:43 INFO ClusterLoadAvgHelper: Current cluster load: 1, Old Ema: 0.0, New Ema: 1.0 22/02/23 06:22:44 INFO SqlDWRelation: Executing UNLOAD statement: SELECT "id", "postalCode", "streetAddress" FROM "exampleinputA" 22/02/23 06:22:44 INFO DefaultJDBCWrapper$: Executing statement: CREATE DATABASE SCOPED CREDENTIAL tmp_databricks_2022_02_23_06_22_44_034_bb45b7df9c9e46b3bcfc505dddd61dda_storage_credential WITH IDENTITY = 'databricks_bridge', SECRET = '...' 22/02/23 06:22:44 INFO EventEmitter: Lineage completed successfully: ResponseMessage(responseCode=200, body="GOOD") {"eventType":"START","eventTime":"2022-02-23T06:22:43.081Z","run":{"runId":"7382fea9-7a29-448e-8a1f-398241a17b76","facets":{"spark_version":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","spark-version":"3.1.2","openlineage-spark-version":"0.6.0-SNAPSHOT"},"spark.logicalPlan":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","plan":[{"class":"org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand","num-children":0,"query":[{"class":"org.apache.spark.sql.catalyst.plans.logical.Project","num-children":1,"projectList":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"postalCode","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":52,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"streetAddress","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":53,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"city","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":58,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"stateAbbreviation","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":59,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"child":0},{"class":"org.apache.spark.sql.catalyst.plans.logical.Join","num-children":2,"left":0,"right":1,"joinType":{"object":"org.apache.spark.sql.catalyst.plans.Inner$"},"condition":[{"class":"org.apache.spark.sql.catalyst.expressions.EqualTo","num-children":2,"left":0,"right":1},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],"hint":null},{"class":"org.apache.spark.sql.execution.datasources.LogicalRelation","num-children":0,"relation":null,"output":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"postalCode","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":52,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"streetAddress","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":53,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"isStreaming":false},{"class":"org.apache.spark.sql.execution.datasources.LogicalRelation","num-children":0,"relation":null,"output":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"city","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":58,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"stateAbbreviation","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":59,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"isStreaming":false}],"dataSource":null,"options":null,"mode":null}]}}},"job":{"namespace":"adbpurviewol1","name":"databricks_shell.execute_save_into_data_source_command","facets":{}},"inputs":[{"namespace":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","name":"exampleinputA","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","uri":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"}]}},"inputFacets":{}},{"namespace":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","name":"/examples/data/csv/exampleInputB","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","uri":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"inputFacets":{}}],"outputs":[{"namespace":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","name":"exampleOutput","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","uri":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]},"tableStateChange":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/DatasetFacet","stateChange":"overwrite"}},"outputFacets":{}}],"producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunEvent"} 22/02/23 06:22:44 INFO AsyncEventQueue: Process of event SparkListenerSQLExecutionStart(8,val outputDf = exampleA.join(exampleB, exampleA...,org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:312) $line2fe7f45f492340bf8688245de77d3ad433.$read$$iw$$iw$$iw$$iw$$iw$$iw.(command-898391230850386:10) $line2fe7f45f492340bf8688245de77d3ad433.$read$$iw$$iw$$iw$$iw$$iw.(command-898391230850386:61) $line2fe7f45f492340bf8688245de77d3ad433.$read$$iw$$iw$$iw$$iw.(command-898391230850386:63) $line2fe7f45f492340bf8688245de77d3ad433.$read$$iw$$iw$$iw.(command-898391230850386:65) $line2fe7f45f492340bf8688245de77d3ad433.$read$$iw$$iw.(command-898391230850386:67) $line2fe7f45f492340bf8688245de77d3ad433.$read$$iw.(command-898391230850386:69) $line2fe7f45f492340bf8688245de77d3ad433.$read.(command-898391230850386:71) $line2fe7f45f492340bf8688245de77d3ad433.$read$.(command-898391230850386:75) $line2fe7f45f492340bf8688245de77d3ad433.$read$.(command-898391230850386) $line2fe7f45f492340bf8688245de77d3ad433.$eval$.$print$lzycompute(:7) $line2fe7f45f492340bf8688245de77d3ad433.$eval$.$print(:6) $line2fe7f45f492340bf8688245de77d3ad433.$eval.$print() sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) java.lang.reflect.Method.invoke(Method.java:498) scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:745) scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1021) scala.tools.nsc.interpreter.IMain.$anonfun$interpret$1(IMain.scala:574),== Physical Plan == Execute SaveIntoDataSourceCommand (1) +- SaveIntoDataSourceCommand (2) +- Project (6) +- Join (5) :- LogicalRelation (3) +- LogicalRelation (4) (1) Execute SaveIntoDataSourceCommand Output: [] (2) SaveIntoDataSourceCommand Arguments: com.databricks.spark.sqldw.DefaultSource@74d497d9, [url=*********(redacted), forwardsparkazurestoragecredentials=*********(redacted), dbtable=exampleOutput, tempdir=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder], Overwrite (3) LogicalRelation Arguments: SqlDWRelation("exampleinputA"), [id#51, postalCode#52, streetAddress#53], false (4) LogicalRelation Arguments: csv, [id#57, city#58, stateAbbreviation#59], false (5) Join Arguments: Inner, (id#51 = id#57) (6) Project Arguments: [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] ,org.apache.spark.sql.execution.SparkPlanInfo@e60025fe,1645597363081) by listener OpenLineageSparkListener took 1.06556704s. 22/02/23 06:22:44 INFO DefaultJDBCWrapper$: Executing statement: CREATE EXTERNAL DATA SOURCE tmp_databricks_2022_02_23_06_22_44_034_bb45b7df9c9e46b3bcfc505dddd61dda_data_source WITH ( TYPE = HADOOP ,LOCATION = 'wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net' ,CREDENTIAL = tmp_databricks_2022_02_23_06_22_44_034_bb45b7df9c9e46b3bcfc505dddd61dda_storage_credential ) 22/02/23 06:22:44 INFO DefaultJDBCWrapper$: Executing statement: CREATE EXTERNAL FILE FORMAT tmp_databricks_2022_02_23_06_22_44_034_bb45b7df9c9e46b3bcfc505dddd61dda_file_format WITH ( FORMAT_TYPE = PARQUET , DATA_COMPRESSION = 'org.apache.hadoop.io.compress.SnappyCodec' ) 22/02/23 06:22:44 INFO DefaultJDBCWrapper$: Executing statement: CREATE EXTERNAL TABLE tmp_databricks_2022_02_23_06_22_44_034_bb45b7df9c9e46b3bcfc505dddd61dda_external_table WITH ( LOCATION = '/tempfolder/2022-02-23/06-22-44-033/38d11743-f65b-4905-ae3f-3b076ee4bfd9/', DATA_SOURCE = tmp_databricks_2022_02_23_06_22_44_034_bb45b7df9c9e46b3bcfc505dddd61dda_data_source, FILE_FORMAT = tmp_databricks_2022_02_23_06_22_44_034_bb45b7df9c9e46b3bcfc505dddd61dda_file_format, REJECT_TYPE = VALUE, REJECT_VALUE = 0 ) AS SELECT "id", "postalCode", "streetAddress" FROM "exampleinputA" OPTION (LABEL = 'Databricks Unload; Container Build 6e3700d'); 22/02/23 06:22:46 INFO ClusterLoadAvgHelper: Current cluster load: 1, Old Ema: 1.0, New Ema: 1.0 22/02/23 06:22:49 INFO ClusterLoadAvgHelper: Current cluster load: 1, Old Ema: 1.0, New Ema: 1.0 22/02/23 06:22:51 INFO DefaultJDBCWrapper$: Executing statement: DROP EXTERNAL TABLE tmp_databricks_2022_02_23_06_22_44_034_bb45b7df9c9e46b3bcfc505dddd61dda_external_table 22/02/23 06:22:51 INFO DefaultJDBCWrapper$: Executing statement: DROP EXTERNAL FILE FORMAT tmp_databricks_2022_02_23_06_22_44_034_bb45b7df9c9e46b3bcfc505dddd61dda_file_format 22/02/23 06:22:51 INFO DefaultJDBCWrapper$: Executing statement: DROP EXTERNAL DATA SOURCE tmp_databricks_2022_02_23_06_22_44_034_bb45b7df9c9e46b3bcfc505dddd61dda_data_source 22/02/23 06:22:52 INFO DefaultJDBCWrapper$: Executing statement: DROP DATABASE SCOPED CREDENTIAL tmp_databricks_2022_02_23_06_22_44_034_bb45b7df9c9e46b3bcfc505dddd61dda_storage_credential 22/02/23 06:22:53 INFO SparkContext: Starting job: load at SqlDWRelation.scala:262 22/02/23 06:22:53 INFO DAGScheduler: Got job 0 (load at SqlDWRelation.scala:262) with 1 output partitions 22/02/23 06:22:53 INFO DAGScheduler: Final stage: ResultStage 0 (load at SqlDWRelation.scala:262) 22/02/23 06:22:53 INFO DAGScheduler: Parents of final stage: List() 22/02/23 06:22:53 INFO DAGScheduler: Missing parents: List() 22/02/23 06:22:53 INFO OpenLineageSparkListener: WILLJ: In a JobStart 22/02/23 06:22:53 INFO OpenLineageSparkListener: WILLJ: jobid: 0 has executionid |9| 22/02/23 06:22:53 INFO OpenLineageSparkListener: WILLJ: We're deep inside this thing and have an executionid |9| 22/02/23 06:22:53 INFO OpenLineageSparkListener: Getting an RDD Execution Context 22/02/23 06:22:53 INFO ContextFactory: Creating a new execution context for executionid |9| 22/02/23 06:22:53 INFO DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[1] at load at SqlDWRelation.scala:262), which has no missing parents 22/02/23 06:22:53 INFO DAGScheduler: Jars for session None: Map(spark://10.139.64.4:38935/jars/addedFile2130668023264349103spark_mssql_connector_2_12_1_2_0-7e673.jar -> 1645597330046) 22/02/23 06:22:53 INFO DAGScheduler: Files for session None: Map(spark://10.139.64.4:38935/files/addedFile2130668023264349103spark_mssql_connector_2_12_1_2_0-7e673.jar -> 1645597330027) 22/02/23 06:22:53 INFO DAGScheduler: Archives for session None: Map() 22/02/23 06:22:53 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[1] at load at SqlDWRelation.scala:262) (first 15 tasks are for partitions Vector(0)) 22/02/23 06:22:53 INFO TaskSchedulerImpl: Adding task set 0.0 with 1 tasks resource profile 0 22/02/23 06:22:53 WARN FairSchedulableBuilder: A job was submitted with scheduler pool 5546023280055149122, which has not been configured. This can happen when the file that pools are read from isn't set, or when that file doesn't contain 5546023280055149122. Created 5546023280055149122 with default configuration (schedulingMode: FIFO, minShare: 0, weight: 1) 22/02/23 06:22:53 INFO FairSchedulableBuilder: Added task set TaskSet_0.0 tasks to pool 5546023280055149122 22/02/23 06:22:53 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0) (10.139.64.5, executor 0, partition 0, PROCESS_LOCAL, taskResourceAssignments Map()) 22/02/23 06:22:55 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 2326 ms on 10.139.64.5 (executor 0) (1/1) 22/02/23 06:22:55 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool 5546023280055149122 22/02/23 06:22:55 INFO DAGScheduler: ResultStage 0 (load at SqlDWRelation.scala:262) finished in 2.451 s 22/02/23 06:22:55 INFO DAGScheduler: Job 0 is finished. Cancelling potential speculative or zombie tasks for this job 22/02/23 06:22:55 INFO TaskSchedulerImpl: Killing all running tasks in stage 0: Stage finished 22/02/23 06:22:55 INFO DAGScheduler: Job 0 finished: load at SqlDWRelation.scala:262, took 2.578630 s 22/02/23 06:22:55 INFO FileSourceStrategy: Output Data Schema: struct 22/02/23 06:22:55 INFO CodeGenerator: Code generated in 40.236747 ms 22/02/23 06:22:55 INFO ClusterLoadAvgHelper: Current cluster load: 1, Old Ema: 1.0, New Ema: 1.0 22/02/23 06:22:56 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 425.3 KiB, free 3.3 GiB) 22/02/23 06:22:56 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 31.2 KiB, free 3.3 GiB) 22/02/23 06:22:56 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on 10.139.64.4:43341 (size: 31.2 KiB, free: 3.3 GiB) 22/02/23 06:22:56 INFO SparkContext: Created broadcast 2 from execute at SqlDWRelation.scala:251 22/02/23 06:22:56 INFO AzureNativeFileSystemStore: URI scheme: wasbs, using https for connections 22/02/23 06:22:56 INFO NativeAzureFileSystem: Delete with limit configurations: deleteFileCountLimitEnabled=false, deleteFileCountLimit=-1 22/02/23 06:22:56 INFO FileSourceScanExec: Planning scan with bin packing, max size: 134217728 bytes, open cost is considered as scanning 4194304 bytes. 22/02/23 06:22:56 INFO FileSourceStrategy: Pushed Filters: IsNotNull(id) 22/02/23 06:22:56 INFO FileSourceStrategy: Post-Scan Filters: isnotnull(id#57) 22/02/23 06:22:56 INFO FileSourceStrategy: Output Data Schema: struct 22/02/23 06:22:56 INFO ContextFactory: WILLJ: Simple String == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=false +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct 22/02/23 06:22:56 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=false +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with input dataset builders [io.openlineage.spark.agent.lifecycle.plan.LogicalRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.LogicalRDDVisitor, io.openlineage.spark.agent.lifecycle.plan.BigQueryNodeVisitor, io.openlineage.spark.agent.lifecycle.plan.KafkaRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.SqlDWDatabricksVisitor, io.openlineage.spark.agent.lifecycle.plan.CommandPlanVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2RelationVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2ScanRelationVisitor] 22/02/23 06:22:56 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=false +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with output dataset builders [] 22/02/23 06:22:56 INFO InsertIntoHadoopFsRelationVisitor: Matched io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor to logical plan InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv 22/02/23 06:22:56 INFO CodeGenerator: Code generated in 34.93724 ms 22/02/23 06:22:56 INFO SparkContext: Starting job: $anonfun$withThreadLocalCaptured$1 at CompletableFuture.java:1604 22/02/23 06:22:56 INFO DAGScheduler: Got job 1 ($anonfun$withThreadLocalCaptured$1 at CompletableFuture.java:1604) with 1 output partitions 22/02/23 06:22:56 INFO DAGScheduler: Final stage: ResultStage 1 ($anonfun$withThreadLocalCaptured$1 at CompletableFuture.java:1604) 22/02/23 06:22:56 INFO DAGScheduler: Submitting ResultStage 1 (MapPartitionsRDD[7] at $anonfun$withThreadLocalCaptured$1 at CompletableFuture.java:1604), which has no missing parents 22/02/23 06:22:56 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 1 (MapPartitionsRDD[7] at $anonfun$withThreadLocalCaptured$1 at CompletableFuture.java:1604) (first 15 tasks are for partitions Vector(0)) 22/02/23 06:22:56 INFO TaskSchedulerImpl: Adding task set 1.0 with 1 tasks resource profile 0 22/02/23 06:22:56 INFO FairSchedulableBuilder: Added task set TaskSet_1.0 tasks to pool 5546023280055149122 22/02/23 06:22:56 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1) (10.139.64.5, executor 0, partition 0, PROCESS_LOCAL, taskResourceAssignments Map()) 22/02/23 06:22:57 INFO EventEmitter: Lineage completed successfully: ResponseMessage(responseCode=200, body="GOOD") {"eventType":"START","eventTime":"2022-02-23T06:22:53.264Z","run":{"runId":"9d89b958-b03f-46c4-ae03-5edb3066bdde","facets":{"environment-properties":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","environment-properties":{"spark.databricks.clusterUsageTags.clusterName":"wj-purview-test","spark.databricks.clusterUsageTags.azureSubscriptionId":"8cef90cc-c11a-43f5-a332-16d403db7513","spark.databricks.notebook.path":"/Shared/examples/synapse-wasbs-in-synapse-out","mountPoints":[{"mountPoint":"/databricks-datasets","source":"databricks-datasets"},{"mountPoint":"/mnt/rawdata","source":"abfss://rawdata@MYOTHERSERVICE.dfs.core.windows.net/"},{"mountPoint":"/databricks/mlflow-tracking","source":"databricks/mlflow-tracking"},{"mountPoint":"/mnt/outputdata","source":"abfss://outputdata@MYOTHERSERVICE.dfs.core.windows.net/"},{"mountPoint":"/databricks-results","source":"databricks-results"},{"mountPoint":"/databricks/mlflow-registry","source":"databricks/mlflow-registry"},{"mountPoint":"/mnt/delta","source":"abfss://deltalake@MYOTHERSERVICE.dfs.core.windows.net/"},{"mountPoint":"/","source":"DatabricksRoot"}],"spark.databricks.clusterUsageTags.clusterOwnerOrgId":"4630430682081461","user":"wijohns@microsoft.com","userId":"897824647105011","orgId":"4630430682081461"}},"spark_version":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","spark-version":"3.1.2","openlineage-spark-version":"0.6.0-SNAPSHOT"}}},"job":{"namespace":"adbpurviewol1","name":"databricks_shell.execute_insert_into_hadoop_fs_relation_command","facets":{}},"inputs":[{"namespace":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","name":"exampleinputA","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","uri":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"}]}},"inputFacets":{}},{"namespace":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","name":"/examples/data/csv/exampleInputB","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","uri":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"inputFacets":{}}],"outputs":[{"namespace":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","name":"/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","uri":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"outputFacets":{}}],"producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunEvent"} 22/02/23 06:22:57 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=false +- == Current Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastQueryStage 0, Statistics(sizeInBytes=122.0 B) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +- *(1) Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct +- == Initial Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with input dataset builders [io.openlineage.spark.agent.lifecycle.plan.LogicalRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.LogicalRDDVisitor, io.openlineage.spark.agent.lifecycle.plan.BigQueryNodeVisitor, io.openlineage.spark.agent.lifecycle.plan.KafkaRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.SqlDWDatabricksVisitor, io.openlineage.spark.agent.lifecycle.plan.CommandPlanVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2RelationVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2ScanRelationVisitor] 22/02/23 06:22:57 INFO AzureNativeFileSystemStore: URI scheme: wasbs, using https for connections 22/02/23 06:22:57 INFO NativeAzureFileSystem: Delete with limit configurations: deleteFileCountLimitEnabled=false, deleteFileCountLimit=-1 22/02/23 06:22:57 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=false +- == Current Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastQueryStage 0, Statistics(sizeInBytes=122.0 B) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +- *(1) Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct +- == Initial Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with output dataset builders [] 22/02/23 06:22:57 INFO InsertIntoHadoopFsRelationVisitor: Matched io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor to logical plan InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv 22/02/23 06:22:57 INFO EventEmitter: Lineage completed successfully: ResponseMessage(responseCode=200, body="GOOD") {"eventType":"COMPLETE","eventTime":"2022-02-23T06:22:55.809Z","run":{"runId":"9d89b958-b03f-46c4-ae03-5edb3066bdde","facets":{"spark_version":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","spark-version":"3.1.2","openlineage-spark-version":"0.6.0-SNAPSHOT"},"spark.logicalPlan":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","plan":[{"class":"org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand","num-children":1,"outputPath":null,"staticPartitions":null,"ifPartitionNotExists":false,"partitionColumns":[],"fileFormat":null,"options":null,"query":0,"mode":null,"outputColumnNames":"[id, postalCode, streetAddress, city, stateAbbreviation]"},{"class":"org.apache.spark.sql.catalyst.plans.logical.Project","num-children":1,"projectList":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"postalCode","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":52,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"streetAddress","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":53,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"city","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":58,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"stateAbbreviation","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":59,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"child":0},{"class":"org.apache.spark.sql.catalyst.plans.logical.Join","num-children":2,"left":0,"right":1,"joinType":{"object":"org.apache.spark.sql.catalyst.plans.Inner$"},"condition":[{"class":"org.apache.spark.sql.catalyst.expressions.EqualTo","num-children":2,"left":0,"right":1},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],"hint":null},{"class":"org.apache.spark.sql.execution.datasources.LogicalRelation","num-children":0,"relation":null,"output":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"postalCode","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":52,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"streetAddress","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":53,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"isStreaming":false},{"class":"org.apache.spark.sql.catalyst.plans.logical.Filter","num-children":1,"condition":[{"class":"org.apache.spark.sql.catalyst.expressions.IsNotNull","num-children":1,"child":0},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],"child":0},{"class":"org.apache.spark.sql.execution.datasources.LogicalRelation","num-children":0,"relation":null,"output":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"city","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":58,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"stateAbbreviation","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":59,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"isStreaming":false}]}}},"job":{"namespace":"adbpurviewol1","name":"databricks_shell.execute_insert_into_hadoop_fs_relation_command","facets":{}},"inputs":[{"namespace":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","name":"exampleinputA","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","uri":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"}]}},"inputFacets":{}},{"namespace":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","name":"/examples/data/csv/exampleInputB","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","uri":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"inputFacets":{}}],"outputs":[{"namespace":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","name":"/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","uri":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"outputFacets":{"outputStatistics":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/OutputStatisticsOutputDatasetFacet.json#/$defs/OutputStatisticsOutputDatasetFacet","rowCount":0,"size":0}}}],"producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunEvent"} 22/02/23 06:22:57 INFO OpenLineageSparkListener: WILLJ: Starting up a SparkSQLExecStart with executionid |9| 22/02/23 06:22:57 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=false +- == Current Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastQueryStage 0, Statistics(sizeInBytes=122.0 B) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +- *(1) Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct +- == Initial Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with input dataset builders [io.openlineage.spark.agent.lifecycle.plan.LogicalRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.LogicalRDDVisitor, io.openlineage.spark.agent.lifecycle.plan.BigQueryNodeVisitor, io.openlineage.spark.agent.lifecycle.plan.KafkaRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.SqlDWDatabricksVisitor, io.openlineage.spark.agent.lifecycle.plan.CommandPlanVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2RelationVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2ScanRelationVisitor] 22/02/23 06:22:57 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=false +- == Current Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastQueryStage 0, Statistics(sizeInBytes=122.0 B) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +- *(1) Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct +- == Initial Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with output dataset builders [] 22/02/23 06:22:57 INFO InsertIntoHadoopFsRelationVisitor: Matched io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor to logical plan InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv 22/02/23 06:22:57 INFO EventEmitter: Lineage completed successfully: ResponseMessage(responseCode=200, body="GOOD") {"eventType":"START","eventTime":"2022-02-23T06:22:56.257Z","run":{"runId":"9d89b958-b03f-46c4-ae03-5edb3066bdde","facets":{"spark_version":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","spark-version":"3.1.2","openlineage-spark-version":"0.6.0-SNAPSHOT"},"spark.logicalPlan":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","plan":[{"class":"org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand","num-children":1,"outputPath":null,"staticPartitions":null,"ifPartitionNotExists":false,"partitionColumns":[],"fileFormat":null,"options":null,"query":0,"mode":null,"outputColumnNames":"[id, postalCode, streetAddress, city, stateAbbreviation]"},{"class":"org.apache.spark.sql.catalyst.plans.logical.Project","num-children":1,"projectList":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"postalCode","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":52,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"streetAddress","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":53,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"city","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":58,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"stateAbbreviation","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":59,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"child":0},{"class":"org.apache.spark.sql.catalyst.plans.logical.Join","num-children":2,"left":0,"right":1,"joinType":{"object":"org.apache.spark.sql.catalyst.plans.Inner$"},"condition":[{"class":"org.apache.spark.sql.catalyst.expressions.EqualTo","num-children":2,"left":0,"right":1},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],"hint":null},{"class":"org.apache.spark.sql.execution.datasources.LogicalRelation","num-children":0,"relation":null,"output":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"postalCode","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":52,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"streetAddress","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":53,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"isStreaming":false},{"class":"org.apache.spark.sql.catalyst.plans.logical.Filter","num-children":1,"condition":[{"class":"org.apache.spark.sql.catalyst.expressions.IsNotNull","num-children":1,"child":0},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],"child":0},{"class":"org.apache.spark.sql.execution.datasources.LogicalRelation","num-children":0,"relation":null,"output":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"city","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":58,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"stateAbbreviation","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":59,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"isStreaming":false}]}}},"job":{"namespace":"adbpurviewol1","name":"databricks_shell.execute_insert_into_hadoop_fs_relation_command","facets":{}},"inputs":[{"namespace":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","name":"exampleinputA","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","uri":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"}]}},"inputFacets":{}},{"namespace":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","name":"/examples/data/csv/exampleInputB","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","uri":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"inputFacets":{}}],"outputs":[{"namespace":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","name":"/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","uri":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"outputFacets":{}}],"producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunEvent"} 22/02/23 06:22:57 INFO OpenLineageSparkListener: WILLJ: In a JobStart 22/02/23 06:22:57 INFO OpenLineageSparkListener: WILLJ: jobid: 1 has executionid |9| 22/02/23 06:22:57 INFO OpenLineageSparkListener: WILLJ: We're deep inside this thing and have an executionid |9| 22/02/23 06:22:57 INFO OpenLineageSparkListener: Getting an RDD Execution Context 22/02/23 06:22:57 INFO OpenLineageRunEventBuilder: WILLJ: REGISTERING JOB 1 22/02/23 06:22:57 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=false +- == Current Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastQueryStage 0, Statistics(sizeInBytes=122.0 B) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +- *(1) Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct +- == Initial Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with input dataset builders [io.openlineage.spark.agent.lifecycle.plan.LogicalRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.LogicalRDDVisitor, io.openlineage.spark.agent.lifecycle.plan.BigQueryNodeVisitor, io.openlineage.spark.agent.lifecycle.plan.KafkaRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.SqlDWDatabricksVisitor, io.openlineage.spark.agent.lifecycle.plan.CommandPlanVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2RelationVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2ScanRelationVisitor] 22/02/23 06:22:57 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=false +- == Current Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastQueryStage 0, Statistics(sizeInBytes=122.0 B) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +- *(1) Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct +- == Initial Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with output dataset builders [] 22/02/23 06:22:57 INFO InsertIntoHadoopFsRelationVisitor: Matched io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor to logical plan InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv 22/02/23 06:22:57 INFO EventEmitter: Lineage completed successfully: ResponseMessage(responseCode=200, body="GOOD") {"eventType":"START","eventTime":"2022-02-23T06:22:56.632Z","run":{"runId":"9d89b958-b03f-46c4-ae03-5edb3066bdde","facets":{"environment-properties":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","environment-properties":{"spark.databricks.clusterUsageTags.clusterName":"wj-purview-test","spark.databricks.clusterUsageTags.azureSubscriptionId":"8cef90cc-c11a-43f5-a332-16d403db7513","spark.databricks.notebook.path":"/Shared/examples/synapse-wasbs-in-synapse-out","mountPoints":[{"mountPoint":"/databricks-datasets","source":"databricks-datasets"},{"mountPoint":"/mnt/rawdata","source":"abfss://rawdata@MYOTHERSERVICE.dfs.core.windows.net/"},{"mountPoint":"/databricks/mlflow-tracking","source":"databricks/mlflow-tracking"},{"mountPoint":"/mnt/outputdata","source":"abfss://outputdata@MYOTHERSERVICE.dfs.core.windows.net/"},{"mountPoint":"/databricks-results","source":"databricks-results"},{"mountPoint":"/databricks/mlflow-registry","source":"databricks/mlflow-registry"},{"mountPoint":"/mnt/delta","source":"abfss://deltalake@MYOTHERSERVICE.dfs.core.windows.net/"},{"mountPoint":"/","source":"DatabricksRoot"}],"spark.databricks.clusterUsageTags.clusterOwnerOrgId":"4630430682081461","user":"wijohns@microsoft.com","userId":"897824647105011","orgId":"4630430682081461"}},"spark_version":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","spark-version":"3.1.2","openlineage-spark-version":"0.6.0-SNAPSHOT"}}},"job":{"namespace":"adbpurviewol1","name":"databricks_shell.execute_insert_into_hadoop_fs_relation_command","facets":{}},"inputs":[{"namespace":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","name":"exampleinputA","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","uri":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"}]}},"inputFacets":{}},{"namespace":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","name":"/examples/data/csv/exampleInputB","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","uri":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"inputFacets":{}}],"outputs":[{"namespace":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","name":"/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","uri":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"outputFacets":{}}],"producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunEvent"} 22/02/23 06:22:58 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 1600 ms on 10.139.64.5 (executor 0) (1/1) 22/02/23 06:22:58 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool 5546023280055149122 22/02/23 06:22:58 INFO DAGScheduler: ResultStage 1 ($anonfun$withThreadLocalCaptured$1 at CompletableFuture.java:1604) finished in 1.689 s 22/02/23 06:22:58 INFO DAGScheduler: Job 1 is finished. Cancelling potential speculative or zombie tasks for this job 22/02/23 06:22:58 INFO TaskSchedulerImpl: Killing all running tasks in stage 1: Stage finished 22/02/23 06:22:58 INFO DAGScheduler: Job 1 finished: $anonfun$withThreadLocalCaptured$1 at CompletableFuture.java:1604, took 1.701011 s 22/02/23 06:22:58 INFO CodeGenerator: Code generated in 11.240013 ms 22/02/23 06:22:58 INFO SparkContext: Created broadcast 5 from $anonfun$withThreadLocalCaptured$1 at CompletableFuture.java:1604 22/02/23 06:22:58 INFO SqlDWRelation: Executing UNLOAD statement: SELECT "id", "postalCode", "streetAddress" FROM "exampleinputA" 22/02/23 06:22:58 INFO DefaultJDBCWrapper$: Executing statement: CREATE DATABASE SCOPED CREDENTIAL tmp_databricks_2022_02_23_06_22_58_458_e265d968e0354b1fb1c5af481c09da94_storage_credential WITH IDENTITY = 'databricks_bridge', SECRET = '...' 22/02/23 06:22:58 INFO DefaultJDBCWrapper$: Executing statement: CREATE EXTERNAL DATA SOURCE tmp_databricks_2022_02_23_06_22_58_458_e265d968e0354b1fb1c5af481c09da94_data_source WITH ( TYPE = HADOOP ,LOCATION = 'wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net' ,CREDENTIAL = tmp_databricks_2022_02_23_06_22_58_458_e265d968e0354b1fb1c5af481c09da94_storage_credential ) 22/02/23 06:22:58 INFO DefaultJDBCWrapper$: Executing statement: CREATE EXTERNAL FILE FORMAT tmp_databricks_2022_02_23_06_22_58_458_e265d968e0354b1fb1c5af481c09da94_file_format WITH ( FORMAT_TYPE = PARQUET , DATA_COMPRESSION = 'org.apache.hadoop.io.compress.SnappyCodec' ) 22/02/23 06:22:58 INFO DefaultJDBCWrapper$: Executing statement: CREATE EXTERNAL TABLE tmp_databricks_2022_02_23_06_22_58_458_e265d968e0354b1fb1c5af481c09da94_external_table WITH ( LOCATION = '/tempfolder/2022-02-23/06-22-58-458/08af09e4-223b-44c4-b22a-5ffe4d0f6fea/', DATA_SOURCE = tmp_databricks_2022_02_23_06_22_58_458_e265d968e0354b1fb1c5af481c09da94_data_source, FILE_FORMAT = tmp_databricks_2022_02_23_06_22_58_458_e265d968e0354b1fb1c5af481c09da94_file_format, REJECT_TYPE = VALUE, REJECT_VALUE = 0 ) AS SELECT "id", "postalCode", "streetAddress" FROM "exampleinputA" OPTION (LABEL = 'Databricks Unload; Container Build 6e3700d'); 22/02/23 06:22:58 INFO ClusterLoadAvgHelper: Current cluster load: 1, Old Ema: 1.0, New Ema: 1.0 22/02/23 06:23:00 INFO DefaultJDBCWrapper$: Executing statement: DROP EXTERNAL TABLE tmp_databricks_2022_02_23_06_22_58_458_e265d968e0354b1fb1c5af481c09da94_external_table 22/02/23 06:23:00 INFO DefaultJDBCWrapper$: Executing statement: DROP EXTERNAL FILE FORMAT tmp_databricks_2022_02_23_06_22_58_458_e265d968e0354b1fb1c5af481c09da94_file_format 22/02/23 06:23:00 INFO DefaultJDBCWrapper$: Executing statement: DROP EXTERNAL DATA SOURCE tmp_databricks_2022_02_23_06_22_58_458_e265d968e0354b1fb1c5af481c09da94_data_source 22/02/23 06:23:01 INFO DefaultJDBCWrapper$: Executing statement: DROP DATABASE SCOPED CREDENTIAL tmp_databricks_2022_02_23_06_22_58_458_e265d968e0354b1fb1c5af481c09da94_storage_credential 22/02/23 06:23:01 INFO SparkContext: Starting job: load at SqlDWRelation.scala:262 22/02/23 06:23:01 INFO DAGScheduler: Got job 2 (load at SqlDWRelation.scala:262) with 1 output partitions 22/02/23 06:23:01 INFO DAGScheduler: Final stage: ResultStage 2 (load at SqlDWRelation.scala:262) 22/02/23 06:23:01 INFO DAGScheduler: Parents of final stage: List() 22/02/23 06:23:01 INFO DAGScheduler: Missing parents: List() 22/02/23 06:23:01 INFO OpenLineageSparkListener: WILLJ: In a JobStart 22/02/23 06:23:01 INFO DAGScheduler: Submitting ResultStage 2 (MapPartitionsRDD[9] at load at SqlDWRelation.scala:262), which has no missing parents 22/02/23 06:23:01 INFO OpenLineageSparkListener: WILLJ: jobid: 2 has executionid |9| 22/02/23 06:23:01 INFO OpenLineageSparkListener: WILLJ: We're deep inside this thing and have an executionid |9| 22/02/23 06:23:01 INFO OpenLineageSparkListener: Getting an RDD Execution Context 22/02/23 06:23:01 INFO DAGScheduler: Jars for session None: Map(spark://10.139.64.4:38935/jars/addedFile2130668023264349103spark_mssql_connector_2_12_1_2_0-7e673.jar -> 1645597330046) 22/02/23 06:23:01 INFO DAGScheduler: Files for session None: Map(spark://10.139.64.4:38935/files/addedFile2130668023264349103spark_mssql_connector_2_12_1_2_0-7e673.jar -> 1645597330027) 22/02/23 06:23:01 INFO DAGScheduler: Archives for session None: Map() 22/02/23 06:23:01 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 2 (MapPartitionsRDD[9] at load at SqlDWRelation.scala:262) (first 15 tasks are for partitions Vector(0)) 22/02/23 06:23:01 INFO TaskSchedulerImpl: Adding task set 2.0 with 1 tasks resource profile 0 22/02/23 06:23:01 WARN FairSchedulableBuilder: A job was submitted with scheduler pool 5546023280055149122, which has not been configured. This can happen when the file that pools are read from isn't set, or when that file doesn't contain 5546023280055149122. Created 5546023280055149122 with default configuration (schedulingMode: FIFO, minShare: 0, weight: 1) 22/02/23 06:23:01 INFO FairSchedulableBuilder: Added task set TaskSet_2.0 tasks to pool 5546023280055149122 22/02/23 06:23:01 INFO TaskSetManager: Starting task 0.0 in stage 2.0 (TID 2) (10.139.64.5, executor 0, partition 0, PROCESS_LOCAL, taskResourceAssignments Map()) 22/02/23 06:23:01 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=false +- == Current Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastQueryStage 0, Statistics(sizeInBytes=1024.0 KiB, rowCount=5, isRuntime=true) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +- *(1) Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct +- == Initial Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with input dataset builders [io.openlineage.spark.agent.lifecycle.plan.LogicalRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.LogicalRDDVisitor, io.openlineage.spark.agent.lifecycle.plan.BigQueryNodeVisitor, io.openlineage.spark.agent.lifecycle.plan.KafkaRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.SqlDWDatabricksVisitor, io.openlineage.spark.agent.lifecycle.plan.CommandPlanVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2RelationVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2ScanRelationVisitor] 22/02/23 06:23:01 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=false +- == Current Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastQueryStage 0, Statistics(sizeInBytes=1024.0 KiB, rowCount=5, isRuntime=true) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +- *(1) Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct +- == Initial Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with output dataset builders [] 22/02/23 06:23:01 INFO InsertIntoHadoopFsRelationVisitor: Matched io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor to logical plan InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv 22/02/23 06:23:01 INFO TaskSetManager: Finished task 0.0 in stage 2.0 (TID 2) in 180 ms on 10.139.64.5 (executor 0) (1/1) 22/02/23 06:23:01 INFO TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool 5546023280055149122 22/02/23 06:23:01 INFO DAGScheduler: ResultStage 2 (load at SqlDWRelation.scala:262) finished in 0.186 s 22/02/23 06:23:01 INFO DAGScheduler: Job 2 is finished. Cancelling potential speculative or zombie tasks for this job 22/02/23 06:23:01 INFO TaskSchedulerImpl: Killing all running tasks in stage 2: Stage finished 22/02/23 06:23:01 INFO DAGScheduler: Job 2 finished: load at SqlDWRelation.scala:262, took 0.190689 s 22/02/23 06:23:01 INFO FileSourceStrategy: Pushed Filters: 22/02/23 06:23:01 INFO FileSourceStrategy: Post-Scan Filters: 22/02/23 06:23:01 INFO FileSourceStrategy: Output Data Schema: struct 22/02/23 06:23:01 INFO EventEmitter: Lineage completed successfully: ResponseMessage(responseCode=200, body="GOOD") {"eventType":"START","eventTime":"2022-02-23T06:23:01.503Z","run":{"runId":"9d89b958-b03f-46c4-ae03-5edb3066bdde","facets":{"environment-properties":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","environment-properties":{"spark.databricks.clusterUsageTags.clusterName":"wj-purview-test","spark.databricks.clusterUsageTags.azureSubscriptionId":"8cef90cc-c11a-43f5-a332-16d403db7513","spark.databricks.notebook.path":"/Shared/examples/synapse-wasbs-in-synapse-out","mountPoints":[{"mountPoint":"/databricks-datasets","source":"databricks-datasets"},{"mountPoint":"/mnt/rawdata","source":"abfss://rawdata@MYOTHERSERVICE.dfs.core.windows.net/"},{"mountPoint":"/databricks/mlflow-tracking","source":"databricks/mlflow-tracking"},{"mountPoint":"/mnt/outputdata","source":"abfss://outputdata@MYOTHERSERVICE.dfs.core.windows.net/"},{"mountPoint":"/databricks-results","source":"databricks-results"},{"mountPoint":"/databricks/mlflow-registry","source":"databricks/mlflow-registry"},{"mountPoint":"/mnt/delta","source":"abfss://deltalake@MYOTHERSERVICE.dfs.core.windows.net/"},{"mountPoint":"/","source":"DatabricksRoot"}],"spark.databricks.clusterUsageTags.clusterOwnerOrgId":"4630430682081461","user":"wijohns@microsoft.com","userId":"897824647105011","orgId":"4630430682081461"}},"spark_version":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","spark-version":"3.1.2","openlineage-spark-version":"0.6.0-SNAPSHOT"}}},"job":{"namespace":"adbpurviewol1","name":"databricks_shell.execute_insert_into_hadoop_fs_relation_command","facets":{}},"inputs":[{"namespace":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","name":"exampleinputA","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","uri":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"}]}},"inputFacets":{}},{"namespace":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","name":"/examples/data/csv/exampleInputB","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","uri":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"inputFacets":{}}],"outputs":[{"namespace":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","name":"/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","uri":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"outputFacets":{}}],"producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunEvent"} 22/02/23 06:23:01 INFO CodeGenerator: Code generated in 41.301645 ms 22/02/23 06:23:01 INFO SparkContext: Starting job: save at SqlDwWriter.scala:152 22/02/23 06:23:01 INFO DAGScheduler: Got job 3 (save at SqlDwWriter.scala:152) with 1 output partitions 22/02/23 06:23:01 INFO DAGScheduler: Final stage: ResultStage 3 (save at SqlDwWriter.scala:152) 22/02/23 06:23:01 INFO DAGScheduler: Parents of final stage: List() 22/02/23 06:23:01 INFO DAGScheduler: Missing parents: List() 22/02/23 06:23:01 INFO OpenLineageSparkListener: WILLJ: In a JobStart 22/02/23 06:23:01 INFO DAGScheduler: Submitting ResultStage 3 (MapPartitionsRDD[13] at save at SqlDwWriter.scala:152), which has no missing parents 22/02/23 06:23:01 INFO OpenLineageSparkListener: WILLJ: jobid: 3 has executionid |9| 22/02/23 06:23:01 INFO OpenLineageSparkListener: WILLJ: We're deep inside this thing and have an executionid |9| 22/02/23 06:23:01 INFO OpenLineageSparkListener: Getting an RDD Execution Context 22/02/23 06:23:01 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=true +- == Final Plan == *(2) Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- *(2) BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- *(2) Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastQueryStage 0, Statistics(sizeInBytes=1024.0 KiB, rowCount=5, isRuntime=true) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +- *(1) Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct +- == Initial Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with input dataset builders [io.openlineage.spark.agent.lifecycle.plan.LogicalRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.LogicalRDDVisitor, io.openlineage.spark.agent.lifecycle.plan.BigQueryNodeVisitor, io.openlineage.spark.agent.lifecycle.plan.KafkaRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.SqlDWDatabricksVisitor, io.openlineage.spark.agent.lifecycle.plan.CommandPlanVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2RelationVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2ScanRelationVisitor] 22/02/23 06:23:01 INFO AzureNativeFileSystemStore: URI scheme: wasbs, using https for connections 22/02/23 06:23:01 INFO NativeAzureFileSystem: Delete with limit configurations: deleteFileCountLimitEnabled=false, deleteFileCountLimit=-1 22/02/23 06:23:01 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=true +- == Final Plan == *(2) Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- *(2) BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- *(2) Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastQueryStage 0, Statistics(sizeInBytes=1024.0 KiB, rowCount=5, isRuntime=true) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +- *(1) Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct +- == Initial Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with output dataset builders [] 22/02/23 06:23:01 INFO InsertIntoHadoopFsRelationVisitor: Matched io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor to logical plan InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv 22/02/23 06:23:01 INFO DAGScheduler: Jars for session None: Map(spark://10.139.64.4:38935/jars/addedFile2130668023264349103spark_mssql_connector_2_12_1_2_0-7e673.jar -> 1645597330046) 22/02/23 06:23:01 INFO DAGScheduler: Files for session None: Map(spark://10.139.64.4:38935/files/addedFile2130668023264349103spark_mssql_connector_2_12_1_2_0-7e673.jar -> 1645597330027) 22/02/23 06:23:01 INFO DAGScheduler: Archives for session None: Map() 22/02/23 06:23:01 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 3 (MapPartitionsRDD[13] at save at SqlDwWriter.scala:152) (first 15 tasks are for partitions Vector(0)) 22/02/23 06:23:01 INFO TaskSchedulerImpl: Adding task set 3.0 with 1 tasks resource profile 0 22/02/23 06:23:01 INFO FairSchedulableBuilder: Added task set TaskSet_3.0 tasks to pool 5546023280055149122 22/02/23 06:23:01 INFO TaskSetManager: Starting task 0.0 in stage 3.0 (TID 3) (10.139.64.5, executor 0, partition 0, PROCESS_LOCAL, taskResourceAssignments Map()) 22/02/23 06:23:01 INFO MemoryStore: Block broadcast_8 stored as values in memory (estimated size 592.8 KiB, free 3.3 GiB) 22/02/23 06:23:01 INFO MemoryStore: Block broadcast_8_piece0 stored as bytes in memory (estimated size 107.7 KiB, free 3.3 GiB) 22/02/23 06:23:01 INFO BlockManagerInfo: Added broadcast_8_piece0 in memory on 10.139.64.4:43341 (size: 107.7 KiB, free: 3.3 GiB) 22/02/23 06:23:01 INFO SparkContext: Created broadcast 8 from broadcast at TaskSetManager.scala:560 22/02/23 06:23:01 INFO BlockManagerInfo: Added broadcast_8_piece0 in memory on 10.139.64.5:45161 (size: 107.7 KiB, free: 3.6 GiB) 22/02/23 06:23:02 INFO EventEmitter: Lineage completed successfully: ResponseMessage(responseCode=200, body="GOOD") {"eventType":"START","eventTime":"2022-02-23T06:23:01.868Z","run":{"runId":"9d89b958-b03f-46c4-ae03-5edb3066bdde","facets":{"environment-properties":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","environment-properties":{"spark.databricks.clusterUsageTags.clusterName":"wj-purview-test","spark.databricks.clusterUsageTags.azureSubscriptionId":"8cef90cc-c11a-43f5-a332-16d403db7513","spark.databricks.notebook.path":"/Shared/examples/synapse-wasbs-in-synapse-out","mountPoints":[{"mountPoint":"/databricks-datasets","source":"databricks-datasets"},{"mountPoint":"/mnt/rawdata","source":"abfss://rawdata@MYOTHERSERVICE.dfs.core.windows.net/"},{"mountPoint":"/databricks/mlflow-tracking","source":"databricks/mlflow-tracking"},{"mountPoint":"/mnt/outputdata","source":"abfss://outputdata@MYOTHERSERVICE.dfs.core.windows.net/"},{"mountPoint":"/databricks-results","source":"databricks-results"},{"mountPoint":"/databricks/mlflow-registry","source":"databricks/mlflow-registry"},{"mountPoint":"/mnt/delta","source":"abfss://deltalake@MYOTHERSERVICE.dfs.core.windows.net/"},{"mountPoint":"/","source":"DatabricksRoot"}],"spark.databricks.clusterUsageTags.clusterOwnerOrgId":"4630430682081461","user":"wijohns@microsoft.com","userId":"897824647105011","orgId":"4630430682081461"}},"spark_version":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","spark-version":"3.1.2","openlineage-spark-version":"0.6.0-SNAPSHOT"}}},"job":{"namespace":"adbpurviewol1","name":"databricks_shell.execute_insert_into_hadoop_fs_relation_command","facets":{}},"inputs":[{"namespace":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","name":"exampleinputA","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","uri":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"}]}},"inputFacets":{}},{"namespace":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","name":"/examples/data/csv/exampleInputB","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","uri":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"inputFacets":{}}],"outputs":[{"namespace":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","name":"/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","uri":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"outputFacets":{}}],"producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunEvent"} 22/02/23 06:23:02 INFO BlockManagerInfo: Added broadcast_7_piece0 in memory on 10.139.64.5:45161 (size: 31.2 KiB, free: 3.6 GiB) 22/02/23 06:23:02 INFO BlockManagerInfo: Added broadcast_5_piece0 in memory on 10.139.64.5:45161 (size: 360.0 B, free: 3.6 GiB) 22/02/23 06:23:04 INFO ClusterLoadAvgHelper: Current cluster load: 1, Old Ema: 1.0, New Ema: 1.0 22/02/23 06:23:05 INFO TaskSetManager: Finished task 0.0 in stage 3.0 (TID 3) in 3991 ms on 10.139.64.5 (executor 0) (1/1) 22/02/23 06:23:05 INFO TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool 5546023280055149122 22/02/23 06:23:05 INFO DAGScheduler: ResultStage 3 (save at SqlDwWriter.scala:152) finished in 4.047 s 22/02/23 06:23:05 INFO DAGScheduler: Job 3 is finished. Cancelling potential speculative or zombie tasks for this job 22/02/23 06:23:05 INFO TaskSchedulerImpl: Killing all running tasks in stage 3: Stage finished 22/02/23 06:23:05 INFO DAGScheduler: Job 3 finished: save at SqlDwWriter.scala:152, took 4.055939 s 22/02/23 06:23:05 INFO DirectoryAtomicCommitProtocol: Committing job de6c1b4f-7b98-4d93-9ffe-1c2ae3dc318a 22/02/23 06:23:05 INFO NativeAzureFileSystem: FS_OP_CREATE FILE[wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/_committed_5989739686687906092] Creating output stream; permission: rw-r--r--, overwrite: true, bufferSize: 65536 22/02/23 06:23:06 INFO NativeAzureFileSystem: FS_OP_CREATE FILE[tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/_committed_5989739686687906092] Closing stream; size: 122 22/02/23 06:23:06 INFO NativeAzureFileSystem: FS_OP_CREATE FILE[tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/_committed_5989739686687906092] Upload complete; size: 122 22/02/23 06:23:06 INFO NativeAzureFileSystem: FS_OP_CREATE FILE[wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/_SUCCESS] Creating output stream; permission: rw-r--r--, overwrite: true, bufferSize: 65536 22/02/23 06:23:06 INFO NativeAzureFileSystem: FS_OP_CREATE FILE[tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/_SUCCESS] Closing stream; size: 0 22/02/23 06:23:06 INFO NativeAzureFileSystem: FS_OP_CREATE FILE[tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/_SUCCESS] Upload complete; size: 0 22/02/23 06:23:06 INFO NativeAzureFileSystem: FS_OP_CREATE FILE[wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/_SUCCESS] Creating output stream; permission: rw-r--r--, overwrite: true, bufferSize: 65536 22/02/23 06:23:06 INFO NativeAzureFileSystem: FS_OP_CREATE FILE[tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/_SUCCESS] Closing stream; size: 0 22/02/23 06:23:06 INFO NativeAzureFileSystem: FS_OP_CREATE FILE[tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/_SUCCESS] Upload complete; size: 0 22/02/23 06:23:06 INFO DirectoryAtomicCommitProtocol: Job commit completed for de6c1b4f-7b98-4d93-9ffe-1c2ae3dc318a 22/02/23 06:23:06 INFO FileFormatWriter: Write Job 454abc1e-1e01-4b8d-a84f-f3de3e813b4f committed. 22/02/23 06:23:06 INFO FileFormatWriter: Finished processing stats for write job 454abc1e-1e01-4b8d-a84f-f3de3e813b4f. 22/02/23 06:23:06 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=true +- == Final Plan == *(2) Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- *(2) BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- *(2) Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastQueryStage 0, Statistics(sizeInBytes=1024.0 KiB, rowCount=5, isRuntime=true) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +- *(1) Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct +- == Initial Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with input dataset builders [io.openlineage.spark.agent.lifecycle.plan.LogicalRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.LogicalRDDVisitor, io.openlineage.spark.agent.lifecycle.plan.BigQueryNodeVisitor, io.openlineage.spark.agent.lifecycle.plan.KafkaRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.SqlDWDatabricksVisitor, io.openlineage.spark.agent.lifecycle.plan.CommandPlanVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2RelationVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2ScanRelationVisitor] 22/02/23 06:23:06 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51 AS id#95, postalCode#52 AS postalCode#96, streetAddress#53 AS streetAddress#97, city#58 AS city#98, stateAbbreviation#59 AS stateAbbreviation#99] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- AdaptiveSparkPlan isFinalPlan=true +- == Final Plan == *(2) Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- *(2) BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- *(2) Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastQueryStage 0, Statistics(sizeInBytes=1024.0 KiB, rowCount=5, isRuntime=true) +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +- *(1) Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct +- == Initial Plan == Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- BroadcastHashJoin [id#51], [id#57], Inner, BuildRight, false :- Scan SqlDWRelation("exampleinputA") [id#51,postalCode#52,streetAddress#53] PushedFilters: [], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +- Filter isnotnull(id#57) +- FileScan csv [id#57,city#58,stateAbbreviation#59] Batched: false, DataFilters: [isnotnull(id#57)], Format: CSV, Location: InMemoryFileIndex[wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net/examples/data/csv/exampl..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct ] with output dataset builders [] 22/02/23 06:23:06 INFO InsertIntoHadoopFsRelationVisitor: Matched io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor to logical plan InsertIntoHadoopFsRelationCommand wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93, false, Parquet, [compression=snappy, path=wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/], ErrorIfExists, [id, postalCode, streetAddress, city, stateAbbreviation] +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Filter isnotnull(id#57) +- Relation[id#57,city#58,stateAbbreviation#59] csv 22/02/23 06:23:06 INFO SqlDwWriter: Dropping existing table, if any: "exampleOutput" 22/02/23 06:23:06 INFO DefaultJDBCWrapper$: Executing statement: IF OBJECT_ID('"exampleOutput"') IS NOT NULL BEGIN DROP TABLE "exampleOutput" END 22/02/23 06:23:06 INFO EventEmitter: Lineage completed successfully: ResponseMessage(responseCode=200, body="GOOD") {"eventType":"COMPLETE","eventTime":"2022-02-23T06:23:06.604Z","run":{"runId":"9d89b958-b03f-46c4-ae03-5edb3066bdde","facets":{"spark_version":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","spark-version":"3.1.2","openlineage-spark-version":"0.6.0-SNAPSHOT"},"spark.logicalPlan":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","plan":[{"class":"org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand","num-children":1,"outputPath":null,"staticPartitions":null,"ifPartitionNotExists":false,"partitionColumns":[],"fileFormat":null,"options":null,"query":0,"mode":null,"outputColumnNames":"[id, postalCode, streetAddress, city, stateAbbreviation]"},{"class":"org.apache.spark.sql.catalyst.plans.logical.Project","num-children":1,"projectList":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"postalCode","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":52,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"streetAddress","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":53,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"city","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":58,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"stateAbbreviation","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":59,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"child":0},{"class":"org.apache.spark.sql.catalyst.plans.logical.Join","num-children":2,"left":0,"right":1,"joinType":{"object":"org.apache.spark.sql.catalyst.plans.Inner$"},"condition":[{"class":"org.apache.spark.sql.catalyst.expressions.EqualTo","num-children":2,"left":0,"right":1},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],"hint":null},{"class":"org.apache.spark.sql.execution.datasources.LogicalRelation","num-children":0,"relation":null,"output":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"postalCode","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":52,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"streetAddress","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":53,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"isStreaming":false},{"class":"org.apache.spark.sql.catalyst.plans.logical.Filter","num-children":1,"condition":[{"class":"org.apache.spark.sql.catalyst.expressions.IsNotNull","num-children":1,"child":0},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],"child":0},{"class":"org.apache.spark.sql.execution.datasources.LogicalRelation","num-children":0,"relation":null,"output":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"city","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":58,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"stateAbbreviation","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":59,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"isStreaming":false}]}}},"job":{"namespace":"adbpurviewol1","name":"databricks_shell.execute_insert_into_hadoop_fs_relation_command","facets":{}},"inputs":[{"namespace":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","name":"exampleinputA","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","uri":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"}]}},"inputFacets":{}},{"namespace":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","name":"/examples/data/csv/exampleInputB","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","uri":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"inputFacets":{}}],"outputs":[{"namespace":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","name":"/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net","uri":"wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"outputFacets":{}}],"producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunEvent"} 22/02/23 06:23:07 INFO DefaultJDBCWrapper$: Executing statement: SELECT co.name FROM sys.columns AS co LEFT JOIN sys.identity_columns AS ic ON co.object_id = ic.object_id AND co.column_id = ic.column_id WHERE co.object_id = OBJECT_ID('"exampleOutput"') AND ic.column_id IS NULL ORDER BY co.column_id 22/02/23 06:23:07 INFO DefaultJDBCWrapper$: Executing statement: IF OBJECT_ID('"exampleOutput"') IS NOT NULL BEGIN COPY INTO "exampleOutput" ("id","postalCode","streetAddress","city","stateAbbreviation") FROM 'https://MYADLSSERVICE.blob.core.windows.net/MYCONTAINER/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/' WITH ( FILE_TYPE = 'PARQUET' ,CREDENTIAL = (IDENTITY = 'Storage Account Key', SECRET = '...') ,COMPRESSION = 'Snappy', IDENTITY_INSERT = 'OFF' ) OPTION (LABEL = 'Databricks Batch Load; Container Build 6e3700d: "exampleOutput"'); END ELSE BEGIN CREATE TABLE "exampleOutput" ( "id" INT NOT NULL, "postalCode" NVARCHAR(256), "streetAddress" NVARCHAR(256), "city" NVARCHAR(256), "stateAbbreviation" NVARCHAR(256) ) WITH (CLUSTERED COLUMNSTORE INDEX, DISTRIBUTION = ROUND_ROBIN); COPY INTO "exampleOutput" ("id","postalCode","streetAddress","city","stateAbbreviation") FROM 'https://MYADLSSERVICE.blob.core.windows.net/MYCONTAINER/tempfolder/2022-02-23/06-22-43-541/0235292d-842e-4546-a49c-814537f4ce93/' WITH ( FILE_TYPE = 'PARQUET' ,CREDENTIAL = (IDENTITY = 'Storage Account Key', SECRET = '...') ,COMPRESSION = 'Snappy', IDENTITY_INSERT = 'OFF' ) OPTION (LABEL = 'Databricks Batch Load; Container Build 6e3700d: "exampleOutput"'); END 22/02/23 06:23:10 INFO SqlDwWriter: Executing postActions 22/02/23 06:23:10 INFO DefaultJDBCWrapper$: Executing statement: SELECT @@VERSION OPTION (LABEL = 'Databricks Get DW @@version; Container Build 6e3700d') 22/02/23 06:23:10 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute SaveIntoDataSourceCommand +- SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv ] with input dataset builders [io.openlineage.spark.agent.lifecycle.plan.LogicalRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.LogicalRDDVisitor, io.openlineage.spark.agent.lifecycle.plan.BigQueryNodeVisitor, io.openlineage.spark.agent.lifecycle.plan.KafkaRelationVisitor, io.openlineage.spark.agent.lifecycle.plan.SqlDWDatabricksVisitor, io.openlineage.spark.agent.lifecycle.plan.CommandPlanVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2RelationVisitor, io.openlineage.spark3.agent.lifecycle.plan.DataSourceV2ScanRelationVisitor] 22/02/23 06:23:10 INFO OpenLineageRunEventBuilder: Visiting query plan Optional[== Parsed Logical Plan == SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Analyzed Logical Plan == SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Optimized Logical Plan == SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv == Physical Plan == Execute SaveIntoDataSourceCommand +- SaveIntoDataSourceCommand com.databricks.spark.sqldw.DefaultSource@74d497d9, Map(url -> *********(redacted), forwardsparkazurestoragecredentials -> *********(redacted), dbtable -> exampleOutput, tempdir -> wasbs://MYCONTAINER@MYADLSSERVICE.blob.core.windows.net/tempfolder), Overwrite +- Project [id#51, postalCode#52, streetAddress#53, city#58, stateAbbreviation#59] +- Join Inner, (id#51 = id#57) :- Relation[id#51,postalCode#52,streetAddress#53] SqlDWRelation("exampleinputA") +- Relation[id#57,city#58,stateAbbreviation#59] csv ] with output dataset builders [] 22/02/23 06:23:11 INFO DefaultJDBCWrapper$: Executing statement: SELECT @@VERSION OPTION (LABEL = 'Databricks Get DW @@version; Container Build 6e3700d') 22/02/23 06:23:11 INFO EventEmitter: Lineage completed successfully: ResponseMessage(responseCode=200, body="GOOD") {"eventType":"COMPLETE","eventTime":"2022-02-23T06:23:10.924Z","run":{"runId":"7382fea9-7a29-448e-8a1f-398241a17b76","facets":{"spark_version":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","spark-version":"3.1.2","openlineage-spark-version":"0.6.0-SNAPSHOT"},"spark.logicalPlan":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunFacet","plan":[{"class":"org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand","num-children":0,"query":[{"class":"org.apache.spark.sql.catalyst.plans.logical.Project","num-children":1,"projectList":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"postalCode","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":52,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"streetAddress","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":53,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"city","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":58,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"stateAbbreviation","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":59,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"child":0},{"class":"org.apache.spark.sql.catalyst.plans.logical.Join","num-children":2,"left":0,"right":1,"joinType":{"object":"org.apache.spark.sql.catalyst.plans.Inner$"},"condition":[{"class":"org.apache.spark.sql.catalyst.expressions.EqualTo","num-children":2,"left":0,"right":1},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]},{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],"hint":null},{"class":"org.apache.spark.sql.execution.datasources.LogicalRelation","num-children":0,"relation":null,"output":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":false,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":51,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"postalCode","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":52,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"streetAddress","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":53,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"isStreaming":false},{"class":"org.apache.spark.sql.execution.datasources.LogicalRelation","num-children":0,"relation":null,"output":[[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"id","dataType":"integer","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":57,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"city","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":58,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}],[{"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference","num-children":0,"name":"stateAbbreviation","dataType":"string","nullable":true,"metadata":{},"exprId":{"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId","id":59,"jvmId":"58d180c9-e902-47c9-b95e-1c8d6991ffdc"},"qualifier":[]}]],"isStreaming":false}],"dataSource":null,"options":null,"mode":null}]}}},"job":{"namespace":"adbpurviewol1","name":"databricks_shell.execute_save_into_data_source_command","facets":{}},"inputs":[{"namespace":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","name":"exampleinputA","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","uri":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"}]}},"inputFacets":{}},{"namespace":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","name":"/examples/data/csv/exampleInputB","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net","uri":"wasbs://rawdata@MYOTHERSERVICE.blob.core.windows.net"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]}},"inputFacets":{}}],"outputs":[{"namespace":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","name":"exampleOutput","facets":{"dataSource":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/DatasourceDatasetFacet.json#/$defs/DatasourceDatasetFacet","name":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;","uri":"sqlserver://MYDBSERVER.sql.azuresynapse.net:1433;database=SQLPool1;"},"schema":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json#/$defs/SchemaDatasetFacet","fields":[{"name":"id","type":"integer"},{"name":"postalCode","type":"string"},{"name":"streetAddress","type":"string"},{"name":"city","type":"string"},{"name":"stateAbbreviation","type":"string"}]},"tableStateChange":{"_producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","_schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/DatasetFacet","stateChange":"overwrite"}},"outputFacets":{}}],"producer":"https://github.com/OpenLineage/OpenLineage/tree/0.6.0-SNAPSHOT/integration/spark","schemaURL":"https://openlineage.io/spec/1-0-2/OpenLineage.json#/$defs/RunEvent"}