spark手把手:[e2-spk-s03]
TRANSCRIPT
3 . 2
3 . 5
3 . 5
dockerrun-v$HOME/docker/spark/e2spkv01:/e2spkv01:ro\--namee2spks03-mysql\-eMYSQL_ROOT_PASSWORD=e2spkv01\-dmysql
3 . 6
dockerexec-ite2spks03-mysql/bin/bash
mysql-uroot-pe2spkv01
source/e2spkv01/e2-spk-s03/scripts/northwind.sql;
3 . 7
dockerrun-v$HOME/docker/spark/e2spkv01:/e2spkv01:rw\-p8080:8080\--namee2spks03-zeppelin\--linke2spks03-mysql:mysql\-ddylanmei/zeppelin
3 . 9
"age";"job";"marital";"education";"default";"balance";"housing";"loan";"contact";"day";"month"30;"unemployed";"married";"primary";"no";1787;"no";"no";"cellular";19;"oct";79;1;-1;0;"unknown"33;"services";"married";"secondary";"no";4789;"yes";"yes";"cellular";11;"may";220;1;339;4;"failure"35;"management";"single";"tertiary";"no";1350;"yes";"no";"cellular";16;"apr";185;1;330;1;"failure"30;"management";"married";"tertiary";"no";1476;"yes";"yes";"unknown";3;"jun";199;4;-1;0;"unknown"
4 . 12
4 . 14
4 . 15
4 . 16
4 . 17
4 . 18
5 . 12
5 . 13
5 . 13
valdf_case01=df.groupBy("A","B").pivot("C").sum("D")z.show(df_case01)//usezeppelintoshowtheresult
5 . 13
5 . 15
5 . 15
5 . 15
5 . 15
5 . 15
5 . 15
5 . 16
5 . 16
5 . 16
5 . 16
5 . 16
5 . 18
5 . 19
5 . 19
5 . 19
5 . 19
5 . 19
5 . 21
5 . 21
5 . 21
5 . 21
5 . 21
5 . 21
df.groupBy("A","B").pivot("C").sum("D").show()
df.groupBy("A","B").pivot("C",Seq("small","large")).sum("D").show()
5 . 23
7 . 3
7 . 3
7 . 8
7 . 8
7 . 8
7 . 10
importorg.apache.spark.sql.SaveModevaljdbcUrl="jdbc:mysql://e2spks03-mysql:3306/northwind?user=root&password=e2spkv01"valoutDataFolder="file:///e2spkv01/e2-spk-s03/datas/northwind"// JDBC Tablesvalnw_tables=List("Categories","CustomerCustomerDemo","CustomerDemographics","Customers","Employees","EmployeeTerritories","OrderDetails","Orders","Region","Products","Shippers" // DataFrame "JDBC"nw_tables.foreach(table=>{valdf=sqlContext.read.format("jdbc").option("url",jdbcUrl).option("dbtable",table).option("driver","com.mysql.jdbc.Driver").option("fetchSize","1000")
7 . 10
7 . 11
7 . 11
7 . 12
valin_DataFolder="file:///e2spkv01/e2-spk-s03/datas/northwind"valnw_parquets=List("Categories","Customers","Employees" ,"EmployeeTerritories","OrderDetails","Orders","Region","Products","Shippers","Suppliers"nw_parquets.foreach(nw_parquet=>{ valdf=sqlContext.read.format("parquet").load(in_DataFolder+"/"+nw_parquet) // DataFrame schema stdout df.printSchema() // DataFrame stdout df.show() })
7 . 12
8 . 3
valin_DataFolder="file:///e2spkv01/e2-spk-s03/datas/northwind"// Parquestvalnw_parquets=List("Categories","Customers","Employees" ,"EmployeeTerritories","OrderDetails","Orders","Region","Products","Shippers","Suppliers"// DataFrame "Parquet"nw_parquets.foreach(nw_parquet=>{ sqlContext.read.format("parquet").load(in_DataFolder+"/"+nw_parquet)).registerTempTable(nw_parquet))
8 . 3
8 . 4
8 . 4