Wednesday, 21 August 2013

Spring data - hadoop connectivity

Spring data - hadoop connectivity

I'm trying out Spring Data - Hadoop for executing the MR code on a remote
cluster from my local machine's IDE
My bean configuration file viz. applicationContext.xml is as follows :
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:hdp="http://www.springframework.org/schema/hadoop"
xmlns:context="http://www.springframework.org/schema/context"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans.xsd
http://www.springframework.org/schema/hadoop
http://www.springframework.org/schema/hadoop/spring-hadoop.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-3.2.xsd">
<context:property-placeholder location="resources/hadoop.properties" />
<hdp:configuration>
fs.default.name=${hd.fs}
</hdp:configuration>
<hdp:job id="wc-job" mapper="com.hadoop.basics.WordCounter.WCMapper"
reducer="com.hadoop.basics.WordCounter.WCReducer"
input-path="${wordcount.input.path}"
output-path="${wordcount.output.path}">
</hdp:job>
</beans>
hadoop.properties
hd.fs=hdfs://cloudx-843-770:9000
wordcount.input.path=/scratchpad/input/Childhood_days.txt
wordcount.output.path=/scratchpad/output
The java class which I'm doing 'Run as ...'
package com.hadoop.basics;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.springframework.context.support.AbstractApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
public class WordCounter {
private static IntWritable one = new IntWritable(1);
public class WCMapper extends Mapper<Text, Text, Text, IntWritable> {
@Override
protected void map(
Text key,
Text value,
org.apache.hadoop.mapreduce.Mapper<Text, Text, Text,
IntWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
StringTokenizer strTokenizer = new
StringTokenizer(value.toString());
Text token = new Text();
while (strTokenizer.hasMoreTokens()) {
token.set(strTokenizer.nextToken());
context.write(token, one);
}
}
}
public class WCReducer extends
Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(
Text key,
Iterable<IntWritable> values,
org.apache.hadoop.mapreduce.Reducer<Text, IntWritable,
Text, IntWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) {
AbstractApplicationContext context = new
ClassPathXmlApplicationContext(
"applicationContext.xml", WordCounter.class);
System.out.println("Word Count Application Running");
context.registerShutdownHook();
}
}
Obviously, I get an error on the cluster :
2013-08-21 17:53:57,110 WARN
org.apache.hadoop.security.ShellBasedUnixGroupsMapping: got exception
trying to get groups for user 298790
org.apache.hadoop.util.Shell$ExitCodeException: id: 298790: No such user
at org.apache.hadoop.util.Shell.runCommand(Shell.java:255)
at org.apache.hadoop.util.Shell.run(Shell.java:182)
at
org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:375)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:461)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:444)
at
org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getUnixGroups(ShellBasedUnixGroupsMapping.java:68)
at
org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getGroups(ShellBasedUnixGroupsMapping.java:45)
at org.apache.hadoop.security.Groups.getGroups(Groups.java:79)
at
org.apache.hadoop.security.UserGroupInformation.getGroupNames(UserGroupInformation.java:1054)
at
org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.<init>(FSPermissionChecker.java:50)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:5464)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkTraverse(FSNamesystem.java:5447)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getFileInfo(FSNamesystem.java:2168)
at
org.apache.hadoop.hdfs.server.namenode.NameNode.getFileInfo(NameNode.java:888)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:578)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1393)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1389)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1387)
2013-08-21 17:53:57,113 WARN
org.apache.hadoop.security.UserGroupInformation: No groups available for
user 298790
2013-08-21 17:53:57,179 WARN
org.apache.hadoop.security.ShellBasedUnixGroupsMapping: got exception
trying to get groups for user 298790
As per the docs., a resource loader must be used for using a different
user, accordingly, I reached to the class HdfsResourceLoader spring data
api but the "user" element isn't recognized by the JobFactoryBean.
How should I proceed ???

No comments:

Post a Comment