chapter 9

CHAPTER 5

CHAPTER 9HadoopMapReduce1OutlineMapReduceMapReduceMapReduceHadoop MapReduce 22MapReduceMapReduceMapReduce Hadoop 33HadoopIntegrated Development Environment (IDE)IDE

44IDE(1/2)/etc/profileJavaHadoopCLASSPATH~# vi /etc/profileCLASSPATH=/opt/hadoop/hadoop-0.20.2-core.jar export CLASSPATHsourceprofile~# source /etc/profileJavaclass~# javac [].java55IDE(2/2)Hadoop (class)Hadoop (class)classclassjar~# jar cvf [jar].jar [].classHadoopjar/hadoop# bin/hadoop jar [jar].jar [] [0] [1] 66IDE (Eclipse) (1/2)Eclipse (http://www.eclipse.org) EclipseLinuxEclipse Classic 3.6.2/opt/eclipse77IDE (Eclipse) (2/2)~# wget http://ftp.cs.pu.edu.tw/pub/eclipse/eclipse/downloads/drops/R-3.6.2-201102101200/eclipse-SDK-3.6.2-linux-gtk.tar.gz~# tar zxvf eclipse-SDK-3.6.2-linux-gtk.tar.gz~# mv eclipse /opt/ /usr/local/bin/eclipse~# ln -sf /opt/eclipse/eclipse /usr/local/bin//opt/hadoopeclipse plugineclipse/plugin~# cp /opt/hadoop/contrib/eclipse-plugin/hadoop-0.20.2-eclipse-plugin.jar /opt/eclipse/plugins/Eclipse~# eclipse &88MapReduceMapReduceMapReduceHadoop 99

(1/15)

1010

(2/15)1111

(3/15)1212

(4/15)1313

(5/15)1414

(6/15)1515

(7/15)1616

(8/15)1717

(9/15)1818

(10/15)1919

(11/15)2020

(12/15)2121

(13/15)2222

(14/15)2323

(15/15)2424MapReduceMapReduceMapReduceHadoop 2525 MapReduceMapReduceMapReduce DriverMapReduceMapReduceMapperkey/value pairkey/value pairReducerkeyvalue

2626MapReduce Driver01. Class MapReduceDriver {02. main(){03. Configuration conf = new Configuration();04. Job job = new Job(conf, Job);05. job.setJarByClass( MapReduceDriver() );06. job.setMapperClass( Mapper );07. job.setReducerClass( Reducer );08. FileInputFormat.addInputPath( job, new Path(args[0]));09. FileOutputFormat.setOutputPath( job, new Path(args[1]));10.11. job.waitForCompletion(true);12. }13. }2727Mapper01. class Mapper extends Mapper< , , , > {02.03. public void map( key, value, Context context) throws IOException, InterruptedException {04. Map05. context.write(IntermediateKey, IntermediateValue);06. }07. }2828Reducer01. class Reducer extends Redcuer < , , , > {02. 03. public void reduce( key, Iterable< > value, Context context) throws IOException, InterruptedException {04. Reduce05. context.write(ResultKey, ResultValue);06. }07. }2929MapReduceMapReduceMapReduceHadoop 3030MapReduce(1/2)maxCPUEclipseMapReduceCPUmaxCPUMapReduceCPUHDFSloglog

3131MapReduce(2/2)CPU2011/01/01 00:00 402011/01/01 01:00 302011/01/02 22:00 402011/01/02 23:00 303232Mapper(1/3)

3333Mapper(2/3)HadoopLabpackage MR_Labmymapper.java mymapper.java01. public class mymapper extends Mapper {02. private Text tday = new Text();03. private IntWritable idata = new IntWritable();04. public void map(Object key, Text value, Context context) throws IOException, InterruptedException {05. String line = value.toString();06. String day = line.substring(0, 10);07. String data = line.substring(17);08. tday.set(day);09. idata.set(Integer.valueOf(data));10. context.write(tday, idata);11. }12. }3434Mapper(3/3)

3535Reducer(1/3)

3636Reducer(2/3)MR_Labpackagemyreducer.java myreducer.java01. public class myreducer extends Reducer {02. IntWritable cpuUtil = new IntWritable();03. public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {04. int maxValue = Integer.MIN_VALUE;05. for (IntWritable val : values) {06. maxValue = Math.max(maxValue, val.get());07. }08. cpuUtil.set(maxValue);09. context.write(key, cpuUtil);10. }11. }3737Reducer(3/3)

3838MapReduce Driver(1/4)

3939MapReduce Driver(2/4)MR_LabpackagemaxCPU.java maxCPU.java01. public class maxCPU {02. public static void main(String[] args) throws Exception {03. Configuration conf = new Configuration();04. String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();05. if (otherArgs.length != 2) {06. System.err.println("Usage: maxCPU ");07. System.exit(2);08. }09. Job job = new Job(conf, "max CPU");10. job.setJarByClass(maxCPU.class);11. job.setMapperClass(mymapper.class);12. job.setCombinerClass(myreducer.class);13. job.setReducerClass(myreducer.class);14. job.setOutputKeyClass(Text.class);15. job.setOutputValueClass(IntWritable.class);16. FileInputFormat.addInputPath(job, new Path(otherArgs[0]));17. FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));4040MapReduce Driver(3/4)18. boolean status = job.waitForCompletion(true);19. if (status) {20. System.exit(0);21. } else {22. System.err.print("Not Complete!");23. System.exit(1);24. }25. }26. }4141MapReduce Driver(4/4)

4242HadoopMapReduce(1/3)

4343HadoopMapReduce(2/3)

4444HadoopMapReduce(3/3)HDFSoutput2011/01/01 1002011/01/02 902011/01/03 802011/01/04 304545MapReduceMapReduceMapReduceHadoop 4646MapReduce MapReduce (maxCPU)HDFSHBase

MapReduceHBaseHDFS2.3.5.MapperReducer4.6.7.Local host1.4747maxCPU(1/2)MapReduce DrivermaxCPUmaxCPUMapReduce01. public class maxCPU {02. public static void main(String[] args) throws Exception {03. Configuration conf = new Configuration();04. String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();05. if (otherArgs.length != 2) {06. System.err.println("Usage: maxCPU ");07. System.exit(2);08. }09. Job job = new Job(conf, "max CPU");10. job.setJarByClass(maxCPU.class);11. job.setMapperClass(mymapper.class);12. job.setCombinerClass(myreducer.class);13. job.setReducerClass(myreducer.class);14. job.setOutputKeyClass(Text.class);15. job.setOutputValueClass(IntWritable.class);16. FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

4848maxCPU(2/2)17. FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));18. CheckDir.check(otherArgs[0].toString(), conf);19. LocalToHdfs.localToHdfs(otherArgs[0].toString(),otherArgs[0].toString(), conf);20. CheckDir.check(otherArgs[1].toString(), conf);21. CheckTable.check("CPU");22. CheckTable.addFamily("CPU", "CPUUtil");23. boolean status = job.waitForCompletion(true);24. if (status) {25. OutputResult.output(otherArgs[1].toString(), conf);26. System.exit(0);27. } else {28. System.err.print("Not Complete!");29. System.exit(1);30. }31. }32. }4949mymapperMappermymapper/10AddDataHBase01. public class mymapper extends Mapper {02. private Text tday = new Text();03. private IntWritable idata = new IntWritable();04. public void map(Object key, Text value, Context context) throws IOException, InterruptedException {05. String line = value.toString();06. String day = line.substring(0, 10);07. String time = line.substring(11, 16);08. String data = line.substring(17);09. try {10. AddData.add("CPU", "CPUUtil", day + " " + time, data);11. } catch (Exception e) {12. System.err.print("ERROR! (add data to HBase)");13. }14. tday.set(day);15. idata.set(Integer.valueOf(data));16. context.write(tday, idata);17. }18. }5050AddDataHBase01. public class AddData {02. public static Configuration configuration = null;03. static {04. configuration = HBaseConfiguration.create();05. configuration.set("hbase.master", "Host01:60000");06. configuration.set("hbase.zookeeper.quorum", "Host01,Host02");07. configuration.set("hbase.zookeeper.property.clientPort", "2222");08. }09. static void add(String table, String family, String dtime, String data) throws Exception {10. HTable htable = new HTable(configuration, table);11. Put row = new Put(dtime.getBytes());12. row.add(family.getBytes(), new String("data").getBytes(), data.getBytes());13. htable.put(row);14. htable.flushCommits();15. }16. }5151myreducerReducermyreducer01. public class myreducer extends Reducer {02. IntWritable cpuUtil = new IntWritable();03. public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {04. int maxValue = Integer.MIN_VALUE;05. for (IntWritable val : values) {06. maxValue = Math.max(maxValue, val.get());07. }08. cpuUtil.set(maxValue);09. context.write(key, cpuUtil);10. }11. }5252CheckDirHDFSHDFS01. public class CheckDir {02. static void check(final String path, Configuration conf) {03. Path dstPath = new Path(path);04. try {05. FileSystem hdfs = dstPath.getFileSystem(conf);06. if (hdfs.exists(dstPath)) {07. hdfs.delete(dstPath, true);08. }09. } catch (IOException e) {10. e.printStackTrace();11. }12. }13. }5353LocalToHdfssrcHDFSdst01. public class LocalToHdfs {02. static void localToHdfs(String src, String dst, Configuration conf) {03. Path dstPath = new Path(dst);04. try {05. FileSystem hdfs = dstPath.getFileSystem(conf);06. hdfs.copyFromLocalFile(false, new Path(src), new Path(dst));07. } catch (IOException e) {08. e.printStackTrace();09. }10. }11. }5454CheckTable(1/2).check() methodCheckDirHBase.addFamily() methodcolumn family01. public class CheckTable {02. public static Configuration configuration = null;03. static {04. configuration = HBaseConfiguration.create();05. configuration.set("hbase.master", "Host01:60000");06. configuration.set("hbase.zookeeper.quorum", "Host01,Host02");

07. configuration.set("hbase.zookeeper.property.clientPort", "2222");08. }09. public static void check(String table) throws Exception {10. HBaseAdmin admin = new HBaseAdmin(configuration);11. if (admin.tableExists(table)) {12. System.out.println("delete the table ");13. admin.disableTable(table);

5555CheckTable(2/2)14. admin.deleteTable(table);15. }16. }17. public static void addFamily(String table, String family) throws Exception {18. HBaseAdmin admin = new HBaseAdmin(configuration);19. HTableDescriptor tableDescripter = new HTableDescriptor(table.getBytes());20. tableDescripter.addFamily(new HColumnDescriptor(family));21. admin.createTable(tableDescripter);22. }23. }5656OutputResult(1/2)HDFSmyreducerCPUScanTableCPU01. public class OutputResult {02. static void output(final String path, Configuration conf) {03. Path dst_path = new Path(path);04. String day = null;05. String value = null;06. try {07. FileSystem hdfs = dst_path.getFileSystem(conf);08. FSDataInputStream in = null;09. if (hdfs.exists(dst_path)) {10. in = hdfs.open(new Path(dst_path.toString() + "/part-r-00000"));11. String messagein = null;12. while ((messagein = in.readLine()) != null) {13. StringTokenizer itr = new StringTokenizer(messagein);14. day = itr.nextToken();15. value = itr.nextToken();16. ScanTable.setFilter("CPU", day, value);17. }5757OutputResult(2/2)18. in.close();19. }20. } catch (IOException e) {21. e.printStackTrace();22. }23. }24. }5858ScanTable(1/2)myreducerCPUHBase01. public class ScanTable {02. public static Configuration configuration = null;03. static {04. configuration = HBaseConfiguration.create();05. configuration.set("hbase.master", "Host01:60000");06. configuration.set("hbase.zookeeper.quorum", "Host01,Host02");07. configuration.set("hbase.zookeeper.property.clientPort", "2222");08. }09. public static void setFilter(String tablename, String day, String value) throws IOException {10. HTable table = new HTable(configuration, tablename);11. Scan scan = new Scan((day + " 00:00").getBytes(), (day + " 23:00").getBytes());12. FilterList filterList = new FilterList();13. filterList.addFilter(new SingleColumnValueFilter("CPUUtil".getBytes(),"data".getBytes(), CompareOp.EQUAL, value.getBytes()));14. scan.setFilter(filterList);5959ScanTable(2/2)15. ResultScanner ResultScannerFilterList = table.getScanner(scan);16. for (Result rs = ResultScannerFilterList.next(); rs != null; rs = ResultScannerFilterList.next()) {17. for (KeyValue kv : rs.list()) {18. System.out.println(new String(kv.getRow()) + " " + new String(kv.getValue()));19. }20. }21. }22. }60602011/01/01 16:00 1002011/01/01 17:00 1002011/01/02 15:00 902011/01/03 16:00 802011/01/03 17:00 802011/01/03 18:00 802011/01/04 00:00 406161MapReduceMapReduceMapReduceHadoop 6262HadoopMapReduceHadoop MapReduceHDFSHDFSNodeHDFSHbaseHbaseHadoopAvroPigHiveChukwa 63

chapter 9

Documents