使用 Hadoop 來計算平均數,當成 Hadoop 一個簡單的程式範例
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;
import java.io.IOException;
import java.util.Iterator;
public class Average {
static class AverageMapper extends MapReduceBase implements Mapper
{
@Override
public void map(LongWritable longWritable, Text value, OutputCollector
output, Reporter reporter) throws IOException {
String lines = value.toString();
int data = Integer.parseInt(lines);
output.collect(new Text("Average"), new IntWritable(data));
}
}
static class AverageReduce extends MapReduceBase implements Reducer
{
@Override
public void reduce(Text key, Iterator
output, Reporter reporter) throws IOException {
long sum = 0;
long count = 0;
while(values.hasNext())
{
sum += values.next().get();
count ++;
}
double average = (double)sum / count;
output.collect(key, new DoubleWritable(average));
}
}
public static void main(String[] args) throws Exception{
if(args.length != 2)
{
System.err.println("Usage: Average
System.exit(2);
}
JobConf conf = new JobConf(Average.class);
conf.setJobName("Average");
FileInputFormat.addInputPath(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(AverageMapper.class);
conf.setReducerClass(AverageReduce.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
}
}
沒有留言:
張貼留言