spark手把手:[e2-spk-s03]

231
1

Upload: erhwen-kuo

Post on 14-Apr-2017

188 views

Category:

Engineering


2 download

TRANSCRIPT

1

2

2

2

2

2

2

2

3 . 1

dockerrun-v$HOME/docker/spark/e2spkv01:/e2spkv01:ro\--namee2spks03-mysql\-eMYSQL_ROOT_PASSWORD=e2spkv01\-dmysql

3 . 6

dockerexec-ite2spks03-mysql/bin/bash

mysql-uroot-pe2spkv01

source/e2spkv01/e2-spk-s03/scripts/northwind.sql;

3 . 7

3 . 8

dockerrun-v$HOME/docker/spark/e2spkv01:/e2spkv01:rw\-p8080:8080\--namee2spks03-zeppelin\--linke2spks03-mysql:mysql\-ddylanmei/zeppelin

3 . 9

3 . 10

3 . 11

4 . 1

4 . 2

4 . 3

4 . 4

4 . 5

4 . 6

4 . 7

4 . 8

4 . 9

4 . 9

4 . 9

4 . 9

4 . 9

4 . 9

4 . 10

4 . 10

4 . 10

4 . 10

4 . 10

4 . 10

4 . 11

4 . 11

4 . 11

4 . 11

4 . 11

"age";"job";"marital";"education";"default";"balance";"housing";"loan";"contact";"day";"month"30;"unemployed";"married";"primary";"no";1787;"no";"no";"cellular";19;"oct";79;1;-1;0;"unknown"33;"services";"married";"secondary";"no";4789;"yes";"yes";"cellular";11;"may";220;1;339;4;"failure"35;"management";"single";"tertiary";"no";1350;"yes";"no";"cellular";16;"apr";185;1;330;1;"failure"30;"management";"married";"tertiary";"no";1476;"yes";"yes";"unknown";3;"jun";199;4;-1;0;"unknown"

4 . 12

4 . 13

5 . 1

5 . 2

5 . 2

5 . 2

5 . 2

5 . 2

5 . 3

5 . 3

5 . 3

5 . 4

5 . 4

5 . 5

5 . 5

5 . 6

5 . 6

5 . 8

5 . 9

5 . 10

5 . 11

valdf_case01=df.groupBy("A","B").pivot("C").sum("D")z.show(df_case01)//usezeppelintoshowtheresult

5 . 13

5 . 14

5 . 14

5 . 17

5 . 20

5 . 20

5 . 22

5 . 23

5 . 23

5 . 23

5 . 23

df.groupBy("A","B").pivot("C").sum("D").show()

5 . 23

df.groupBy("A","B").pivot("C").sum("D").show()

df.groupBy("A","B").pivot("C",Seq("small","large")).sum("D").show()

5 . 23

5 . 24

5 . 24

df.groupBy("A","B").pivot("C").agg(sum("D"),avg("D")).show

5 . 24

5 . 25

5 . 25

df.withColumn(“p”,concat($”p1”,$”p2”)).groupBy(“a”,“b”).pivot(“p”).agg(…)

5 . 25

5 . 26

5 . 26

5 . 26

df.withColumn(“p”,concat($”p1”,$”p2”)).groupBy(“a”,“b”).pivot(“p”).agg(…)

5 . 26

6 . 1

6 . 2

6 . 2

6 . 2

6 . 3

6 . 3

6 . 3

6 . 3

6 . 3

6 . 3

6 . 4

6 . 4

6 . 4

6 . 4

6 . 4

6 . 4

6 . 5

6 . 5

6 . 5

6 . 6

6 . 6

6 . 6

6 . 6

6 . 7

6 . 7

6 . 8

6 . 8

6 . 9

6 . 9

6 . 10

6 . 10

6 . 11

6 . 12

6 . 13

7 . 1

7 . 2

7 . 2

7 . 2

7 . 2

7 . 2

7 . 4

7 . 4

7 . 4

7 . 4

7 . 5

7 . 5

%psqlshowtables

7 . 5

%psqlshowtables

7 . 5

7 . 6

7 . 6

7 . 6

7 . 6

7 . 6

7 . 6

7 . 7

7 . 7

7 . 7

7 . 7

7 . 8

7 . 9

importorg.apache.spark.sql.SaveModevaljdbcUrl="jdbc:mysql://e2spks03-mysql:3306/northwind?user=root&password=e2spkv01"valoutDataFolder="file:///e2spkv01/e2-spk-s03/datas/northwind"// JDBC Tablesvalnw_tables=List("Categories","CustomerCustomerDemo","CustomerDemographics","Customers","Employees","EmployeeTerritories","OrderDetails","Orders","Region","Products","Shippers" // DataFrame "JDBC"nw_tables.foreach(table=>{valdf=sqlContext.read.format("jdbc").option("url",jdbcUrl).option("dbtable",table).option("driver","com.mysql.jdbc.Driver").option("fetchSize","1000")

7 . 10

valin_DataFolder="file:///e2spkv01/e2-spk-s03/datas/northwind"valnw_parquets=List("Categories","Customers","Employees" ,"EmployeeTerritories","OrderDetails","Orders","Region","Products","Shippers","Suppliers"nw_parquets.foreach(nw_parquet=>{ valdf=sqlContext.read.format("parquet").load(in_DataFolder+"/"+nw_parquet) // DataFrame schema stdout df.printSchema() // DataFrame stdout df.show() })

7 . 12

7 . 13

7 . 13

7 . 14

8 . 1

8 . 2

8 . 2

8 . 2

8 . 2

valin_DataFolder="file:///e2spkv01/e2-spk-s03/datas/northwind"// Parquestvalnw_parquets=List("Categories","Customers","Employees" ,"EmployeeTerritories","OrderDetails","Orders","Region","Products","Shippers","Suppliers"// DataFrame "Parquet"nw_parquets.foreach(nw_parquet=>{ sqlContext.read.format("parquet").load(in_DataFolder+"/"+nw_parquet)).registerTempTable(nw_parquet))

8 . 3

8 . 5

8 . 5

8 . 6

8 . 6

8 . 6

8 . 6

8 . 6

8 . 6

8 . 7

8 . 7

8 . 7

8 . 7

8 . 7

8 . 7

8 . 7

9