chapter 9

63
CHAPTER 9 使使 Hadoop 使使 MapReduce

Upload: september-alvarado

Post on 31-Dec-2015

79 views

Category:

Documents


1 download

DESCRIPTION

CHAPTER 9. 使用 Hadoop 實作 MapReduce. Outline. 開發環境 設定 新增 專案 MapReduce 程式 架構 MapReduce 基礎實作 範例 MapReduce 進階實作 範例 Hadoop MapReduce 專題. 開發環境設定 新增專案 MapReduce 程式架構 MapReduce 基礎實作範例 MapReduce 進階實 作範例 Hadoop 專題. 開發環境設定. Hadoop 程式開發環境的 架設可分為兩種; - PowerPoint PPT Presentation

TRANSCRIPT

CHAPTER 5

CHAPTER 9HadoopMapReduce1OutlineMapReduceMapReduceMapReduceHadoop MapReduce 22MapReduceMapReduceMapReduce Hadoop 33HadoopIntegrated Development Environment (IDE)IDE

44IDE(1/2)/etc/profileJavaHadoopCLASSPATH~# vi /etc/profileCLASSPATH=/opt/hadoop/hadoop-0.20.2-core.jar export CLASSPATHsourceprofile~# source /etc/profileJavaclass~# javac [].java55IDE(2/2)Hadoop (class)Hadoop (class)classclassjar~# jar cvf [jar].jar [].classHadoopjar/hadoop# bin/hadoop jar [jar].jar [] [0] [1] 66IDE (Eclipse) (1/2)Eclipse (http://www.eclipse.org) EclipseLinuxEclipse Classic 3.6.2/opt/eclipse77IDE (Eclipse) (2/2)~# wget http://ftp.cs.pu.edu.tw/pub/eclipse/eclipse/downloads/drops/R-3.6.2-201102101200/eclipse-SDK-3.6.2-linux-gtk.tar.gz~# tar zxvf eclipse-SDK-3.6.2-linux-gtk.tar.gz~# mv eclipse /opt/ /usr/local/bin/eclipse~# ln -sf /opt/eclipse/eclipse /usr/local/bin//opt/hadoopeclipse plugineclipse/plugin~# cp /opt/hadoop/contrib/eclipse-plugin/hadoop-0.20.2-eclipse-plugin.jar /opt/eclipse/plugins/Eclipse~# eclipse &88MapReduceMapReduceMapReduceHadoop 99

(1/15)

1010

(2/15)1111

(3/15)1212

(4/15)1313

(5/15)1414

(6/15)1515

(7/15)1616

(8/15)1717

(9/15)1818

(10/15)1919

(11/15)2020

(12/15)2121

(13/15)2222

(14/15)2323

(15/15)2424MapReduceMapReduceMapReduceHadoop 2525 MapReduceMapReduceMapReduce DriverMapReduceMapReduceMapperkey/value pairkey/value pairReducerkeyvalue

2626MapReduce Driver01. Class MapReduceDriver {02. main(){03. Configuration conf = new Configuration();04. Job job = new Job(conf, Job);05. job.setJarByClass( MapReduceDriver() );06. job.setMapperClass( Mapper );07. job.setReducerClass( Reducer );08. FileInputFormat.addInputPath( job, new Path(args[0]));09. FileOutputFormat.setOutputPath( job, new Path(args[1]));10.11. job.waitForCompletion(true);12. }13. }2727Mapper01. class Mapper extends Mapper< , , , > {02.03. public void map( key, value, Context context) throws IOException, InterruptedException {04. Map05. context.write(IntermediateKey, IntermediateValue);06. }07. }2828Reducer01. class Reducer extends Redcuer < , , , > {02. 03. public void reduce( key, Iterable< > value, Context context) throws IOException, InterruptedException {04. Reduce05. context.write(ResultKey, ResultValue);06. }07. }2929MapReduceMapReduceMapReduceHadoop 3030MapReduce(1/2)maxCPUEclipseMapReduceCPUmaxCPUMapReduceCPUHDFSloglog

3131MapReduce(2/2)CPU2011/01/01 00:00 402011/01/01 01:00 302011/01/02 22:00 402011/01/02 23:00 303232Mapper(1/3)

3333Mapper(2/3)HadoopLabpackage MR_Labmymapper.java mymapper.java01. public class mymapper extends Mapper {02. private Text tday = new Text();03. private IntWritable idata = new IntWritable();04. public void map(Object key, Text value, Context context) throws IOException, InterruptedException {05. String line = value.toString();06. String day = line.substring(0, 10);07. String data = line.substring(17);08. tday.set(day);09. idata.set(Integer.valueOf(data));10. context.write(tday, idata);11. }12. }3434Mapper(3/3)

3535Reducer(1/3)

3636Reducer(2/3)MR_Labpackagemyreducer.java myreducer.java01. public class myreducer extends Reducer {02. IntWritable cpuUtil = new IntWritable();03. public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {04. int maxValue = Integer.MIN_VALUE;05. for (IntWritable val : values) {06. maxValue = Math.max(maxValue, val.get());07. }08. cpuUtil.set(maxValue);09. context.write(key, cpuUtil);10. }11. }3737Reducer(3/3)

3838MapReduce Driver(1/4)

3939MapReduce Driver(2/4)MR_LabpackagemaxCPU.java maxCPU.java01. public class maxCPU {02. public static void main(String[] args) throws Exception {03. Configuration conf = new Configuration();04. String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();05. if (otherArgs.length != 2) {06. System.err.println("Usage: maxCPU ");07. System.exit(2);08. }09. Job job = new Job(conf, "max CPU");10. job.setJarByClass(maxCPU.class);11. job.setMapperClass(mymapper.class);12. job.setCombinerClass(myreducer.class);13. job.setReducerClass(myreducer.class);14. job.setOutputKeyClass(Text.class);15. job.setOutputValueClass(IntWritable.class);16. FileInputFormat.addInputPath(job, new Path(otherArgs[0]));17. FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));4040MapReduce Driver(3/4)18. boolean status = job.waitForCompletion(true);19. if (status) {20. System.exit(0);21. } else {22. System.err.print("Not Complete!");23. System.exit(1);24. }25. }26. }4141MapReduce Driver(4/4)

4242HadoopMapReduce(1/3)

4343HadoopMapReduce(2/3)

4444HadoopMapReduce(3/3)HDFSoutput2011/01/01 1002011/01/02 902011/01/03 802011/01/04 304545MapReduceMapReduceMapReduceHadoop 4646MapReduce MapReduce (maxCPU)HDFSHBase

MapReduceHBaseHDFS2.3.5.MapperReducer4.6.7.Local host1.4747maxCPU(1/2)MapReduce DrivermaxCPUmaxCPUMapReduce01. public class maxCPU {02. public static void main(String[] args) throws Exception {03. Configuration conf = new Configuration();04. String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();05. if (otherArgs.length != 2) {06. System.err.println("Usage: maxCPU ");07. System.exit(2);08. }09. Job job = new Job(conf, "max CPU");10. job.setJarByClass(maxCPU.class);11. job.setMapperClass(mymapper.class);12. job.setCombinerClass(myreducer.class);13. job.setReducerClass(myreducer.class);14. job.setOutputKeyClass(Text.class);15. job.setOutputValueClass(IntWritable.class);16. FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

4848maxCPU(2/2)17. FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));18. CheckDir.check(otherArgs[0].toString(), conf);19. LocalToHdfs.localToHdfs(otherArgs[0].toString(),otherArgs[0].toString(), conf);20. CheckDir.check(otherArgs[1].toString(), conf);21. CheckTable.check("CPU");22. CheckTable.addFamily("CPU", "CPUUtil");23. boolean status = job.waitForCompletion(true);24. if (status) {25. OutputResult.output(otherArgs[1].toString(), conf);26. System.exit(0);27. } else {28. System.err.print("Not Complete!");29. System.exit(1);30. }31. }32. }4949mymapperMappermymapper/10AddDataHBase01. public class mymapper extends Mapper {02. private Text tday = new Text();03. private IntWritable idata = new IntWritable();04. public void map(Object key, Text value, Context context) throws IOException, InterruptedException {05. String line = value.toString();06. String day = line.substring(0, 10);07. String time = line.substring(11, 16);08. String data = line.substring(17);09. try {10. AddData.add("CPU", "CPUUtil", day + " " + time, data);11. } catch (Exception e) {12. System.err.print("ERROR! (add data to HBase)");13. }14. tday.set(day);15. idata.set(Integer.valueOf(data));16. context.write(tday, idata);17. }18. }5050AddDataHBase01. public class AddData {02. public static Configuration configuration = null;03. static {04. configuration = HBaseConfiguration.create();05. configuration.set("hbase.master", "Host01:60000");06. configuration.set("hbase.zookeeper.quorum", "Host01,Host02");07. configuration.set("hbase.zookeeper.property.clientPort", "2222");08. }09. static void add(String table, String family, String dtime, String data) throws Exception {10. HTable htable = new HTable(configuration, table);11. Put row = new Put(dtime.getBytes());12. row.add(family.getBytes(), new String("data").getBytes(), data.getBytes());13. htable.put(row);14. htable.flushCommits();15. }16. }5151myreducerReducermyreducer01. public class myreducer extends Reducer {02. IntWritable cpuUtil = new IntWritable();03. public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {04. int maxValue = Integer.MIN_VALUE;05. for (IntWritable val : values) {06. maxValue = Math.max(maxValue, val.get());07. }08. cpuUtil.set(maxValue);09. context.write(key, cpuUtil);10. }11. }5252CheckDirHDFSHDFS01. public class CheckDir {02. static void check(final String path, Configuration conf) {03. Path dstPath = new Path(path);04. try {05. FileSystem hdfs = dstPath.getFileSystem(conf);06. if (hdfs.exists(dstPath)) {07. hdfs.delete(dstPath, true);08. }09. } catch (IOException e) {10. e.printStackTrace();11. }12. }13. }5353LocalToHdfssrcHDFSdst01. public class LocalToHdfs {02. static void localToHdfs(String src, String dst, Configuration conf) {03. Path dstPath = new Path(dst);04. try {05. FileSystem hdfs = dstPath.getFileSystem(conf);06. hdfs.copyFromLocalFile(false, new Path(src), new Path(dst));07. } catch (IOException e) {08. e.printStackTrace();09. }10. }11. }5454CheckTable(1/2).check() methodCheckDirHBase.addFamily() methodcolumn family01. public class CheckTable {02. public static Configuration configuration = null;03. static {04. configuration = HBaseConfiguration.create();05. configuration.set("hbase.master", "Host01:60000");06. configuration.set("hbase.zookeeper.quorum", "Host01,Host02");

07. configuration.set("hbase.zookeeper.property.clientPort", "2222");08. }09. public static void check(String table) throws Exception {10. HBaseAdmin admin = new HBaseAdmin(configuration);11. if (admin.tableExists(table)) {12. System.out.println("delete the table ");13. admin.disableTable(table);

5555CheckTable(2/2)14. admin.deleteTable(table);15. }16. }17. public static void addFamily(String table, String family) throws Exception {18. HBaseAdmin admin = new HBaseAdmin(configuration);19. HTableDescriptor tableDescripter = new HTableDescriptor(table.getBytes());20. tableDescripter.addFamily(new HColumnDescriptor(family));21. admin.createTable(tableDescripter);22. }23. }5656OutputResult(1/2)HDFSmyreducerCPUScanTableCPU01. public class OutputResult {02. static void output(final String path, Configuration conf) {03. Path dst_path = new Path(path);04. String day = null;05. String value = null;06. try {07. FileSystem hdfs = dst_path.getFileSystem(conf);08. FSDataInputStream in = null;09. if (hdfs.exists(dst_path)) {10. in = hdfs.open(new Path(dst_path.toString() + "/part-r-00000"));11. String messagein = null;12. while ((messagein = in.readLine()) != null) {13. StringTokenizer itr = new StringTokenizer(messagein);14. day = itr.nextToken();15. value = itr.nextToken();16. ScanTable.setFilter("CPU", day, value);17. }5757OutputResult(2/2)18. in.close();19. }20. } catch (IOException e) {21. e.printStackTrace();22. }23. }24. }5858ScanTable(1/2)myreducerCPUHBase01. public class ScanTable {02. public static Configuration configuration = null;03. static {04. configuration = HBaseConfiguration.create();05. configuration.set("hbase.master", "Host01:60000");06. configuration.set("hbase.zookeeper.quorum", "Host01,Host02");07. configuration.set("hbase.zookeeper.property.clientPort", "2222");08. }09. public static void setFilter(String tablename, String day, String value) throws IOException {10. HTable table = new HTable(configuration, tablename);11. Scan scan = new Scan((day + " 00:00").getBytes(), (day + " 23:00").getBytes());12. FilterList filterList = new FilterList();13. filterList.addFilter(new SingleColumnValueFilter("CPUUtil".getBytes(),"data".getBytes(), CompareOp.EQUAL, value.getBytes()));14. scan.setFilter(filterList);5959ScanTable(2/2)15. ResultScanner ResultScannerFilterList = table.getScanner(scan);16. for (Result rs = ResultScannerFilterList.next(); rs != null; rs = ResultScannerFilterList.next()) {17. for (KeyValue kv : rs.list()) {18. System.out.println(new String(kv.getRow()) + " " + new String(kv.getValue()));19. }20. }21. }22. }60602011/01/01 16:00 1002011/01/01 17:00 1002011/01/02 15:00 902011/01/03 16:00 802011/01/03 17:00 802011/01/03 18:00 802011/01/04 00:00 406161MapReduceMapReduceMapReduceHadoop 6262HadoopMapReduceHadoop MapReduceHDFSHDFSNodeHDFSHbaseHbaseHadoopAvroPigHiveChukwa 63