-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo
82 lines (65 loc) · 2.41 KB
/
demo
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
- **Spark master UI:** http:https://localhost:9090
- **Spark worker a UI:** http:https://localhost:9091
- **Spark worker b UI:** http:https://localhost:9092
- **Minio:** http:https://localhost:9001
- **Presto:** http:https://localhost:8000
- **Metabase.** http:https://localhost:3000
presto --server localhost:8080 --catalog hive --schema default
CREATE TABLE IF NOT EXISTS Part_Maker (
part_maker_id INT ,
name VARCHAR(50)
) WITH (
external_location = 's3a:https://datalake/bronze/CarPartsDB/Jun-06-2023/Part_Maker',
format = 'parquet'
);
CREATE TABLE IF NOT EXISTS Part (
part_id INT ,
brand_id INT,
supplier_id INT,
part_group_id INT,
part_maker_id INT,
part_name VARCHAR(50),
main_supplier_name VARCHAR(50),
price_to_us INT,
price_to_customer INT
) WITH (
external_location = 's3a:https://datalake/bronze/CarPartsDB/Jun-06-2023/Part',
format = 'parquet'
);
CREATE TABLE IF NOT EXISTS Part_for_Car (
car_id INT,
part_id INT
) WITH (
external_location = 's3a:https://datalake/bronze/CarPartsDB/Jun-06-2023/Part_for_Car',
format = 'parquet'
);
CREATE TABLE IF NOT EXISTS Part_Supplier (
part_supplier_id INT ,
part_id INT,
supplier_id INT
) WITH (
external_location = 's3a:https://datalake/bronze/CarPartsDB/Jun-06-2023/Part_Supplier',
format = 'parquet'
);
CREATE TABLE IF NOT EXISTS Part_in_Order (
part_in_order_id INT,
order_id INT,
part_supplier_id INT,
actual_sale_price INT,
quantity INT
) WITH (
external_location = 's3a:https://datalake/bronze/CarPartsDB/Jun-06-2023/Part_in_Order',
format = 'parquet'
);
jupyter notebook --ip=0.0.0.0 --port=8888 --no-browser --allow-root
export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip:$PYTHONPATH"
docker exec -it master spark-submit --jars /opt/spark/jars/aws-java-sdk-bundle-1.11.375.jar,/opt/spark/jars/hadoop-aws-3.2.0.jar,/opt/spark/jars/postgresql-42.3.5.jar \
--master spark:https://master:7077 \
--deploy-mode client \
--executor-memory 1G \
--executor-cores 1 \
--packages io.delta:delta-core_2.12:1.0.1 \
--conf "spark.executor.extraClassPath=/opt/spark/jars/aws-java-sdk-bundle-1.11.375.jar:/opt/spark/jars/hadoop-aws-3.2.0.jar:/opt/spark/jars/postgresql-42.3.5.jar" \
--conf "spark.driver.extraClassPath=/opt/spark/jars/aws-java-sdk-bundle-1.11.375.jar:/opt/spark/jars/hadoop-aws-3.2.0.jar:/opt/spark/jars/postgresql-42.3.5.jar" \
/opt/workspace/postgres_to_s3.py