In [None]:
import warnings
# More specific pattern matching
warnings.filterwarnings("ignore", message=".*Could not infer format.*dateutil.*")
# Also try suppressing all UserWarnings from the specific file
warnings.filterwarnings("ignore", category=UserWarning, module="sparkmagic.*")


***

### Prerequisites
<div class="alert alert-block alert-info">
<b>NOTE :</b> In order to execute this notebook successfully as is, please ensure the following prerequisites are completed.</div>

* EMR Serverless should be chosen as the Compute. The Application version should be 7.80 or higher.
* Make sure the Studio user role has permission to attach the Workspace to the Application and to pass the runtime role to it.
* This notebook uses the `PySpark` kernel.
***

### 1. Configure your Spark session in your Application.
* EMR Serverless Application -> Advanced Configuration -> Add the following configuration
* replace <workspace_url> with your databricks worksapce URL. Example: demo.cloud.databricks.com
* replace <<your default unity catalog> with the unity catalog you would like to use. Please make sure this unity catalog is accessible by external systems
    
<div class="alert alert-block alert-info">
{
  "runtimeConfiguration": [
    {
      "classification": "spark-defaults",
      "properties": {
        "spark.jars.packages": org.apache.iceberg:iceberg-spark-runtime-3.4_2.12:1.9.1,io.unitycatalog:unitycatalog-spark_2.12:0.2.0",
        "spark.sql.extensions": "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions",
        "spark.hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem",
        "spark.sql.catalog.<your default unity catalog>": "org.apache.iceberg.spark.SparkCatalog",
        "spark.sql.catalog.<your default unity catalog>.uri": "https://<workspace_url>/api/2.1/unity-catalog/iceberg-rest",
        "spark.sql.defaultCatalog": "<your default unity catalog>",
        "spark.sql.catalog.<your default unity catalog>.type": "rest",
        "spark.sql.catalog.<your default unity catalog>.warehouse": "<your default unity catalog>"
      }
    }
  ]
}
</div>

### 2. Ensure Databricks Unity Catalog is accessible by the External System

Please make sure the Metastore of your catalog has enabled "External data access"

### 3. Access to Unity Catalog storage data 
Please make sure the EMR Serverless Runtime Execution IAM Role has access to the storage bucket and storage bucket objects of the Unity Catalog's storage data

### 4. Add Databricks Personal Access Token to the Spark Session
* Create Personal Access Token in Databricks Console or via API
* For Blog purpose - we are directly using it here

<b>NOTE :</b> Store it in AWS Secret Access and extract it from there to configure it within the Notebook.  It is the recommended practice for production deployment</div>

In [None]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("EMR Serverless and Databricks Unity Catalog Demo") \
    .config("spark.sql.catalog.demo_catalog.token", "<token>") \
    .getOrCreate()

### 5. Verify the configuration of current Spark Context

In [None]:
# Verify the configuration
essential_configs = [
    'spark.jars.packages', 'spark.executor.instances', 'spark.cores',
    'spark.driver.cores', 'spark.executor.cores', 'spark.memory',
    'spark.driver.memory', 'spark.executor.memory', 'spark.sql.catalog',
    'spark.sql.defaultCatalog', 'spark.master', 'spark.sql.extensions'
]
for item in spark.sparkContext.getConf().getAll():
    if any(config in item[0] for config in essential_configs):
        print(item)

### 6. Display all the catalogs from the Databricks Unity Catalog

In [None]:
# Try to use Unity Catalog
spark.sql("SHOW CATALOGS").show()

### 7. Create Iceberg Table in Databricks under the catalog.schema (Example: demo_catalog.customerschema)


In [None]:
%%sql
CREATE SCHEMA IF NOT EXISTS customerschema;

In [None]:
%%sql
USE SCHEMA customerschema;

In [None]:
%%sql

CREATE TABLE IF NOT EXISTS iceberg_customer (id string, name string, country string) USING iceberg;


In [None]:
%%sql
insert into iceberg_customer values('1','Alice','US');

In [None]:
%%sql
select * from iceberg_customer ;