Slide 5
Slide 5 text
Confidential. © Red Sqirl Analytics
We want to make what is complex….
public void map(LongWritable key, Text value,
OutputCollector outputCollector, Reporter reporter)
throws IOException {
String dataRow = value.toString();
// since these are tab seperated files lets tokenize on tab
StringTokenizer dataTokenizer = new StringTokenizer(dataRow, "\t");
String articleName = dataTokenizer.nextToken();
String pointType = dataTokenizer.nextToken();
String geoPoint = dataTokenizer.nextToken();
// we know that this data row is a GEO RSS type point.
if (GEO_RSS_URI.equals(pointType)) {
// now we process the GEO point data.
StringTokenizer st = new StringTokenizer(geoPoint, " ");
String strLat = st.nextToken();
String strLong = st.nextToken();
double lat = Double.parseDouble(strLat);
double lang = Double.parseDouble(strLong);
long roundedLat = Math.round(lat);
long roundedLong = Math.round(lang);
String locationKey = "(" + String.valueOf(roundedLat) + ","
+ String.valueOf(roundedLong) + ")";
String locationName = URLDecoder.decode(articleName, "UTF-8");
locationName = locationName.replace("_", " ");
geoLocationKey.set(locationKey);
geoLocationName.set(locationName);
outputCollector.collect(geoLocationKey, geoLocationName);
}
}
An example of Hadoop MapReduce
5