001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.action.hadoop; 016 017 import java.io.BufferedReader; 018 import java.io.File; 019 import java.io.FileNotFoundException; 020 import java.io.IOException; 021 import java.io.InputStream; 022 import java.io.InputStreamReader; 023 import java.io.StringReader; 024 import java.net.ConnectException; 025 import java.net.URI; 026 import java.net.UnknownHostException; 027 import java.util.ArrayList; 028 import java.util.HashMap; 029 import java.util.HashSet; 030 import java.util.List; 031 import java.util.Map; 032 import java.util.Properties; 033 import java.util.Set; 034 import java.util.Map.Entry; 035 036 import org.apache.hadoop.conf.Configuration; 037 import org.apache.hadoop.filecache.DistributedCache; 038 import org.apache.hadoop.fs.FileSystem; 039 import org.apache.hadoop.fs.Path; 040 import org.apache.hadoop.fs.permission.AccessControlException; 041 import org.apache.hadoop.mapred.JobClient; 042 import org.apache.hadoop.mapred.JobConf; 043 import org.apache.hadoop.mapred.JobID; 044 import org.apache.hadoop.mapred.RunningJob; 045 import org.apache.hadoop.util.DiskChecker; 046 import org.apache.oozie.WorkflowJobBean; 047 import org.apache.oozie.action.ActionExecutor; 048 import org.apache.oozie.action.ActionExecutorException; 049 import org.apache.oozie.client.OozieClient; 050 import org.apache.oozie.client.WorkflowAction; 051 import org.apache.oozie.service.HadoopAccessorException; 052 import org.apache.oozie.service.HadoopAccessorService; 053 import org.apache.oozie.service.Services; 054 import org.apache.oozie.service.WorkflowAppService; 055 import org.apache.oozie.servlet.CallbackServlet; 056 import org.apache.oozie.util.IOUtils; 057 import org.apache.oozie.util.PropertiesUtils; 058 import org.apache.oozie.util.XConfiguration; 059 import org.apache.oozie.util.XLog; 060 import org.apache.oozie.util.XmlUtils; 061 import org.jdom.Element; 062 import org.jdom.JDOMException; 063 import org.jdom.Namespace; 064 import org.apache.hadoop.security.token.Token; 065 import org.apache.hadoop.security.token.TokenIdentifier; 066 067 public class JavaActionExecutor extends ActionExecutor { 068 069 private static final String HADOOP_USER = "user.name"; 070 private static final String HADOOP_UGI = "hadoop.job.ugi"; 071 private static final String HADOOP_JOB_TRACKER = "mapred.job.tracker"; 072 private static final String HADOOP_NAME_NODE = "fs.default.name"; 073 074 private static final Set<String> DISALLOWED_PROPERTIES = new HashSet<String>(); 075 076 private static int maxActionOutputLen; 077 078 private static final String SUCCEEDED = "SUCCEEDED"; 079 private static final String KILLED = "KILLED"; 080 private static final String FAILED = "FAILED"; 081 private static final String FAILED_KILLED = "FAILED/KILLED"; 082 private static final String RUNNING = "RUNNING"; 083 private XLog log = XLog.getLog(getClass()); 084 085 static { 086 DISALLOWED_PROPERTIES.add(HADOOP_USER); 087 DISALLOWED_PROPERTIES.add(HADOOP_UGI); 088 DISALLOWED_PROPERTIES.add(HADOOP_JOB_TRACKER); 089 DISALLOWED_PROPERTIES.add(HADOOP_NAME_NODE); 090 DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_JT_KERBEROS_NAME); 091 DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_NN_KERBEROS_NAME); 092 } 093 094 public JavaActionExecutor() { 095 this("java"); 096 } 097 098 protected JavaActionExecutor(String type) { 099 super(type); 100 } 101 102 protected String getLauncherJarName() { 103 return getType() + "-launcher.jar"; 104 } 105 106 protected List<Class> getLauncherClasses() { 107 List<Class> classes = new ArrayList<Class>(); 108 classes.add(LauncherMapper.class); 109 classes.add(LauncherSecurityManager.class); 110 classes.add(LauncherException.class); 111 classes.add(LauncherMainException.class); 112 return classes; 113 } 114 115 @Override 116 public void initActionType() { 117 super.initActionType(); 118 maxActionOutputLen = getOozieConf().getInt(CallbackServlet.CONF_MAX_DATA_LEN, 2 * 1024); 119 try { 120 List<Class> classes = getLauncherClasses(); 121 Class[] launcherClasses = classes.toArray(new Class[classes.size()]); 122 IOUtils.createJar(new File(getOozieRuntimeDir()), getLauncherJarName(), launcherClasses); 123 124 registerError(UnknownHostException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA001"); 125 registerError(AccessControlException.class.getName(), ActionExecutorException.ErrorType.NON_TRANSIENT, 126 "JA002"); 127 registerError(DiskChecker.DiskOutOfSpaceException.class.getName(), 128 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA003"); 129 registerError(org.apache.hadoop.hdfs.protocol.QuotaExceededException.class.getName(), 130 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA004"); 131 registerError(org.apache.hadoop.hdfs.server.namenode.SafeModeException.class.getName(), 132 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA005"); 133 registerError(ConnectException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA006"); 134 registerError(JDOMException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA007"); 135 registerError(FileNotFoundException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA008"); 136 registerError(IOException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA009"); 137 } 138 catch (IOException ex) { 139 throw new RuntimeException(ex); 140 } 141 } 142 143 void checkForDisallowedProps(Configuration conf, String confName) throws ActionExecutorException { 144 for (String prop : DISALLOWED_PROPERTIES) { 145 if (conf.get(prop) != null) { 146 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA010", 147 "Property [{0}] not allowed in action [{1}] configuration", prop, confName); 148 } 149 } 150 } 151 152 public Configuration createBaseHadoopConf(Context context, Element actionXml) { 153 Configuration conf = new XConfiguration(); 154 conf.set(HADOOP_USER, context.getProtoActionConf().get(WorkflowAppService.HADOOP_USER)); 155 conf.set(HADOOP_UGI, context.getProtoActionConf().get(WorkflowAppService.HADOOP_UGI)); 156 if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME) != null) { 157 conf.set(WorkflowAppService.HADOOP_JT_KERBEROS_NAME, context.getProtoActionConf().get( 158 WorkflowAppService.HADOOP_JT_KERBEROS_NAME)); 159 } 160 if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME) != null) { 161 conf.set(WorkflowAppService.HADOOP_NN_KERBEROS_NAME, context.getProtoActionConf().get( 162 WorkflowAppService.HADOOP_NN_KERBEROS_NAME)); 163 } 164 conf.set(OozieClient.GROUP_NAME, context.getProtoActionConf().get(OozieClient.GROUP_NAME)); 165 Namespace ns = actionXml.getNamespace(); 166 String jobTracker = actionXml.getChild("job-tracker", ns).getTextTrim(); 167 String nameNode = actionXml.getChild("name-node", ns).getTextTrim(); 168 conf.set(HADOOP_JOB_TRACKER, jobTracker); 169 conf.set(HADOOP_NAME_NODE, nameNode); 170 conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "true"); 171 return conf; 172 } 173 174 Configuration setupLauncherConf(Configuration conf, Element actionXml, Path appPath, Context context) 175 throws ActionExecutorException { 176 try { 177 Namespace ns = actionXml.getNamespace(); 178 Element e = actionXml.getChild("configuration", ns); 179 if (e != null) { 180 String strConf = XmlUtils.prettyPrint(e).toString(); 181 XConfiguration inlineConf = new XConfiguration(new StringReader(strConf)); 182 183 XConfiguration launcherConf = new XConfiguration(); 184 for (Map.Entry<String, String> entry : inlineConf) { 185 if (entry.getKey().startsWith("oozie.launcher.")) { 186 String name = entry.getKey().substring("oozie.launcher.".length()); 187 String value = entry.getValue(); 188 // setting original KEY 189 launcherConf.set(entry.getKey(), value); 190 // setting un-prefixed key (to allow Hadoop job config 191 // for the launcher job 192 launcherConf.set(name, value); 193 } 194 } 195 checkForDisallowedProps(launcherConf, "inline launcher configuration"); 196 XConfiguration.copy(launcherConf, conf); 197 } 198 return conf; 199 } 200 catch (IOException ex) { 201 throw convertException(ex); 202 } 203 } 204 205 protected FileSystem getActionFileSystem(Context context, WorkflowAction action) throws ActionExecutorException { 206 try { 207 Element actionXml = XmlUtils.parseXml(action.getConf()); 208 return getActionFileSystem(context, actionXml); 209 } 210 catch (JDOMException ex) { 211 throw convertException(ex); 212 } 213 } 214 215 protected FileSystem getActionFileSystem(Context context, Element actionXml) throws ActionExecutorException { 216 try { 217 return context.getAppFileSystem(); 218 } 219 catch (Exception ex) { 220 throw convertException(ex); 221 } 222 } 223 224 Configuration setupActionConf(Configuration actionConf, Context context, Element actionXml, Path appPath) 225 throws ActionExecutorException { 226 try { 227 Namespace ns = actionXml.getNamespace(); 228 Element e = actionXml.getChild("job-xml", ns); 229 if (e != null) { 230 String jobXml = e.getTextTrim(); 231 Path path = new Path(appPath, jobXml); 232 FileSystem fs = getActionFileSystem(context, actionXml); 233 Configuration jobXmlConf = new XConfiguration(fs.open(path)); 234 checkForDisallowedProps(jobXmlConf, "job-xml"); 235 XConfiguration.copy(jobXmlConf, actionConf); 236 } 237 e = actionXml.getChild("configuration", ns); 238 if (e != null) { 239 String strConf = XmlUtils.prettyPrint(e).toString(); 240 XConfiguration inlineConf = new XConfiguration(new StringReader(strConf)); 241 checkForDisallowedProps(inlineConf, "inline configuration"); 242 XConfiguration.copy(inlineConf, actionConf); 243 } 244 return actionConf; 245 } 246 catch (IOException ex) { 247 throw convertException(ex); 248 } 249 } 250 251 Configuration addToCache(Configuration conf, Path appPath, String filePath, boolean archive) 252 throws ActionExecutorException { 253 Path path = null; 254 try { 255 if (filePath.startsWith("/")) { 256 path = new Path(filePath); 257 } 258 else { 259 path = new Path(appPath, filePath); 260 } 261 URI uri = new URI(path.toUri().getPath()); 262 if (archive) { 263 DistributedCache.addCacheArchive(uri, conf); 264 } 265 else { 266 String fileName = filePath.substring(filePath.lastIndexOf("/") + 1); 267 if (fileName.endsWith(".so") || fileName.contains(".so.")) { // .so files 268 uri = new Path(path.toString() + "#" + fileName).toUri(); 269 uri = new URI(uri.getPath()); 270 DistributedCache.addCacheFile(uri, conf); 271 } 272 else if (fileName.endsWith(".jar")) { // .jar files 273 if (!fileName.contains("#")) { 274 path = new Path(uri.toString()); 275 276 String user = conf.get("user.name"); 277 String group = conf.get("group.name"); 278 Services.get().get(HadoopAccessorService.class).addFileToClassPath(user, group, path, conf); 279 } 280 else { 281 DistributedCache.addCacheFile(uri, conf); 282 } 283 } 284 else { // regular files 285 if (!fileName.contains("#")) { 286 uri = new Path(path.toString() + "#" + fileName).toUri(); 287 uri = new URI(uri.getPath()); 288 } 289 DistributedCache.addCacheFile(uri, conf); 290 } 291 } 292 DistributedCache.createSymlink(conf); 293 return conf; 294 } 295 catch (Exception ex) { 296 XLog.getLog(getClass()).debug( 297 "Errors when add to DistributedCache. Path=" + path + ", archive=" + archive + ", conf=" 298 + XmlUtils.prettyPrint(conf).toString()); 299 throw convertException(ex); 300 } 301 } 302 303 String getOozieLauncherJar(Context context) throws ActionExecutorException { 304 try { 305 return new Path(context.getActionDir(), getLauncherJarName()).toString(); 306 } 307 catch (Exception ex) { 308 throw convertException(ex); 309 } 310 } 311 312 public void prepareActionDir(FileSystem actionFs, Context context) throws ActionExecutorException { 313 try { 314 Path actionDir = context.getActionDir(); 315 Path tempActionDir = new Path(actionDir.getParent(), actionDir.getName() + ".tmp"); 316 if (!actionFs.exists(actionDir)) { 317 try { 318 actionFs.copyFromLocalFile(new Path(getOozieRuntimeDir(), getLauncherJarName()), new Path( 319 tempActionDir, getLauncherJarName())); 320 actionFs.rename(tempActionDir, actionDir); 321 } 322 catch (IOException ex) { 323 actionFs.delete(tempActionDir, true); 324 actionFs.delete(actionDir, true); 325 throw ex; 326 } 327 } 328 } 329 catch (Exception ex) { 330 throw convertException(ex); 331 } 332 } 333 334 void cleanUpActionDir(FileSystem actionFs, Context context) throws ActionExecutorException { 335 try { 336 Path actionDir = context.getActionDir(); 337 if (!context.getProtoActionConf().getBoolean("oozie.action.keep.action.dir", false) 338 && actionFs.exists(actionDir)) { 339 actionFs.delete(actionDir, true); 340 } 341 } 342 catch (Exception ex) { 343 throw convertException(ex); 344 } 345 } 346 347 @SuppressWarnings("unchecked") 348 void setLibFilesArchives(Context context, Element actionXml, Path appPath, Configuration conf) 349 throws ActionExecutorException { 350 Configuration proto = context.getProtoActionConf(); 351 352 addToCache(conf, appPath, getOozieLauncherJar(context), false); 353 354 String[] paths = proto.getStrings(WorkflowAppService.APP_LIB_PATH_LIST); 355 if (paths != null) { 356 for (String path : paths) { 357 addToCache(conf, appPath, path, false); 358 } 359 } 360 361 for (Element eProp : (List<Element>) actionXml.getChildren()) { 362 if (eProp.getName().equals("file")) { 363 String path = eProp.getTextTrim(); 364 addToCache(conf, appPath, path, false); 365 } 366 else { 367 if (eProp.getName().equals("archive")) { 368 String path = eProp.getTextTrim(); 369 addToCache(conf, appPath, path, true); 370 } 371 } 372 } 373 } 374 375 protected String getLauncherMain(Configuration launcherConf, Element actionXml) { 376 Namespace ns = actionXml.getNamespace(); 377 Element e = actionXml.getChild("main-class", ns); 378 return e.getTextTrim(); 379 } 380 381 private static final Set<String> SPECIAL_PROPERTIES = new HashSet<String>(); 382 383 static { 384 SPECIAL_PROPERTIES.add("mapred.job.queue.name"); 385 SPECIAL_PROPERTIES.add("mapreduce.jobtracker.kerberos.principal"); 386 SPECIAL_PROPERTIES.add("dfs.namenode.kerberos.principal"); 387 } 388 389 @SuppressWarnings("unchecked") 390 JobConf createLauncherConf(FileSystem actionFs, Context context, WorkflowAction action, Element actionXml, Configuration actionConf) 391 throws ActionExecutorException { 392 try { 393 394 // app path could be a file 395 Path appPathRoot = new Path(context.getWorkflow().getAppPath()); 396 if (actionFs.isFile(appPathRoot)) { 397 appPathRoot = appPathRoot.getParent(); 398 } 399 400 // launcher job configuration 401 Configuration launcherConf = createBaseHadoopConf(context, actionXml); 402 setupLauncherConf(launcherConf, actionXml, appPathRoot, context); 403 404 // we are doing init+copy because if not we are getting 'hdfs' 405 // scheme not known 406 // its seems that new JobConf(Conf) does not load defaults, it 407 // assumes parameter Conf does. 408 JobConf launcherJobConf = new JobConf(); 409 XConfiguration.copy(launcherConf, launcherJobConf); 410 setLibFilesArchives(context, actionXml, appPathRoot, launcherJobConf); 411 String jobName = XLog.format("oozie:launcher:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow() 412 .getAppName(), action.getName(), context.getWorkflow().getId()); 413 launcherJobConf.setJobName(jobName); 414 415 String jobId = context.getWorkflow().getId(); 416 String actionId = action.getId(); 417 Path actionDir = context.getActionDir(); 418 String recoveryId = context.getRecoveryId(); 419 420 LauncherMapper.setupLauncherInfo(launcherJobConf, jobId, actionId, actionDir, recoveryId, actionConf); 421 422 LauncherMapper.setupMainClass(launcherJobConf, getLauncherMain(launcherConf, actionXml)); 423 424 LauncherMapper.setupMaxOutputData(launcherJobConf, maxActionOutputLen); 425 426 Namespace ns = actionXml.getNamespace(); 427 List<Element> list = actionXml.getChildren("arg", ns); 428 String[] args = new String[list.size()]; 429 for (int i = 0; i < list.size(); i++) { 430 args[i] = list.get(i).getTextTrim(); 431 } 432 LauncherMapper.setupMainArguments(launcherJobConf, args); 433 434 Element opt = actionXml.getChild("java-opts", ns); 435 if (opt != null) { 436 String opts = launcherConf.get("mapred.child.java.opts", ""); 437 opts = opts + " " + opt.getTextTrim(); 438 opts = opts.trim(); 439 launcherJobConf.set("mapred.child.java.opts", opts); 440 } 441 442 // properties from action that are needed by the launcher (QUEUE NAME) 443 // maybe we should add queue to the WF schema, below job-tracker 444 for (String name : SPECIAL_PROPERTIES) { 445 String value = actionConf.get(name); 446 if (value != null) { 447 launcherJobConf.set(name, value); 448 } 449 } 450 451 // to disable cancelation of delegation token on launcher job end 452 launcherJobConf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); 453 454 // setting the group owning the Oozie job to allow anybody in that 455 // group to kill the jobs. 456 launcherJobConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup()); 457 458 return launcherJobConf; 459 } 460 catch (Exception ex) { 461 throw convertException(ex); 462 } 463 } 464 465 private void injectCallback(Context context, Configuration conf) { 466 String callback = context.getCallbackUrl("$jobStatus"); 467 if (conf.get("job.end.notification.url") != null) { 468 XLog.getLog(getClass()).warn("Overriding the action job end notification URI"); 469 } 470 conf.set("job.end.notification.url", callback); 471 } 472 473 void injectActionCallback(Context context, Configuration actionConf) { 474 injectCallback(context, actionConf); 475 } 476 477 void injectLauncherCallback(Context context, Configuration launcherConf) { 478 injectCallback(context, launcherConf); 479 } 480 481 public void submitLauncher(FileSystem actionFs, Context context, WorkflowAction action) throws ActionExecutorException { 482 JobClient jobClient = null; 483 boolean exception = false; 484 try { 485 Path appPathRoot = new Path(context.getWorkflow().getAppPath()); 486 487 // app path could be a file 488 if (actionFs.isFile(appPathRoot)) { 489 appPathRoot = appPathRoot.getParent(); 490 } 491 492 Element actionXml = XmlUtils.parseXml(action.getConf()); 493 494 // action job configuration 495 Configuration actionConf = createBaseHadoopConf(context, actionXml); 496 setupActionConf(actionConf, context, actionXml, appPathRoot); 497 XLog.getLog(getClass()).debug("Setting LibFilesArchives "); 498 setLibFilesArchives(context, actionXml, appPathRoot, actionConf); 499 String jobName = XLog.format("oozie:action:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow() 500 .getAppName(), action.getName(), context.getWorkflow().getId()); 501 actionConf.set("mapred.job.name", jobName); 502 injectActionCallback(context, actionConf); 503 504 // setting the group owning the Oozie job to allow anybody in that 505 // group to kill the jobs. 506 actionConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup()); 507 508 // Setting the credential properties in launcher conf 509 HashMap<String, CredentialsProperties> credentialsProperties = setCredentialPropertyToActionConf(context, 510 action, actionConf); 511 512 // Adding if action need to set more credential tokens 513 JobConf credentialsConf = new JobConf(false); 514 XConfiguration.copy(actionConf, credentialsConf); 515 setCredentialTokens(credentialsConf, context, action, credentialsProperties); 516 517 // insert conf to action conf from credentialsConf 518 for (Entry<String, String> entry : credentialsConf) { 519 if (actionConf.get(entry.getKey()) == null) { 520 actionConf.set(entry.getKey(), entry.getValue()); 521 } 522 } 523 524 JobConf launcherJobConf = createLauncherConf(actionFs, context, action, actionXml, actionConf); 525 injectLauncherCallback(context, launcherJobConf); 526 XLog.getLog(getClass()).debug("Creating Job Client for action " + action.getId()); 527 jobClient = createJobClient(context, launcherJobConf); 528 String launcherId = LauncherMapper.getRecoveryId(launcherJobConf, context.getActionDir(), context 529 .getRecoveryId()); 530 boolean alreadyRunning = launcherId != null; 531 RunningJob runningJob; 532 533 if (alreadyRunning) { 534 runningJob = jobClient.getJob(JobID.forName(launcherId)); 535 if (runningJob == null) { 536 String jobTracker = launcherJobConf.get("mapred.job.tracker"); 537 throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017", 538 "unknown job [{0}@{1}], cannot recover", launcherId, jobTracker); 539 } 540 } 541 else { 542 prepare(context, actionXml); 543 XLog.getLog(getClass()).debug("Submitting the job through Job Client for action " + action.getId()); 544 545 // setting up propagation of the delegation token. 546 AuthHelper.get().set(jobClient, launcherJobConf); 547 log.debug(WorkflowAppService.HADOOP_JT_KERBEROS_NAME + " = " 548 + launcherJobConf.get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME)); 549 log.debug(WorkflowAppService.HADOOP_NN_KERBEROS_NAME + " = " 550 + launcherJobConf.get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME)); 551 552 // insert credentials tokens to launcher job conf if needed 553 if (needInjectCredentials()) { 554 for (Token<? extends TokenIdentifier> tk : credentialsConf.getCredentials().getAllTokens()) { 555 log.debug("ADDING TOKEN: " + tk.getKind().toString()); 556 launcherJobConf.getCredentials().addToken(tk.getKind(), tk); 557 } 558 } 559 else { 560 log.info("No need to inject credentials."); 561 } 562 runningJob = jobClient.submitJob(launcherJobConf); 563 if (runningJob == null) { 564 throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017", 565 "Error submitting launcher for action [{0}]", action.getId()); 566 } 567 launcherId = runningJob.getID().toString(); 568 XLog.getLog(getClass()).debug("After submission get the launcherId " + launcherId); 569 } 570 571 String jobTracker = launcherJobConf.get(HADOOP_JOB_TRACKER); 572 String consoleUrl = runningJob.getTrackingURL(); 573 context.setStartData(launcherId, jobTracker, consoleUrl); 574 } 575 catch (Exception ex) { 576 exception = true; 577 throw convertException(ex); 578 } 579 finally { 580 if (jobClient != null) { 581 try { 582 jobClient.close(); 583 } 584 catch (Exception e) { 585 if (exception) { 586 log.error("JobClient error: ", e); 587 } 588 else { 589 throw convertException(e); 590 } 591 } 592 } 593 } 594 } 595 596 private boolean needInjectCredentials() { 597 boolean methodExists = true; 598 599 Class klass; 600 try { 601 klass = Class.forName("org.apache.hadoop.mapred.JobConf"); 602 klass.getMethod("getCredentials"); 603 } 604 catch (ClassNotFoundException ex) { 605 methodExists = false; 606 } 607 catch (NoSuchMethodException ex) { 608 methodExists = false; 609 } 610 611 return methodExists; 612 } 613 614 protected HashMap<String, CredentialsProperties> setCredentialPropertyToActionConf(Context context, 615 WorkflowAction action, Configuration actionConf) throws Exception { 616 HashMap<String, CredentialsProperties> credPropertiesMap = null; 617 if (context != null && action != null) { 618 credPropertiesMap = getActionCredentialsProperties(context, action); 619 if (credPropertiesMap != null) { 620 for (String key : credPropertiesMap.keySet()) { 621 CredentialsProperties prop = credPropertiesMap.get(key); 622 if (prop != null) { 623 log.debug("Credential Properties set for action : " + action.getId()); 624 for (String property : prop.getProperties().keySet()) { 625 actionConf.set(property, prop.getProperties().get(property)); 626 log.debug("property : '" + property + "', value : '" + prop.getProperties().get(property) + "'"); 627 } 628 } 629 } 630 } 631 else { 632 log.warn("No credential properties found for action : " + action.getId() + ", cred : " + action.getCred()); 633 } 634 } 635 else { 636 log.warn("context or action is null"); 637 } 638 return credPropertiesMap; 639 } 640 641 protected void setCredentialTokens(JobConf jobconf, Context context, WorkflowAction action, 642 HashMap<String, CredentialsProperties> credPropertiesMap) throws Exception { 643 644 if (context != null && action != null && credPropertiesMap != null) { 645 for (Entry<String, CredentialsProperties> entry : credPropertiesMap.entrySet()) { 646 String credName = entry.getKey(); 647 CredentialsProperties credProps = entry.getValue(); 648 if (credProps != null) { 649 CredentialsProvider credProvider = new CredentialsProvider(credProps.getType()); 650 Credentials credentialObject = credProvider.createCredentialObject(); 651 if (credentialObject != null) { 652 credentialObject.addtoJobConf(jobconf, credProps, context); 653 log.debug("Retrieved Credential '" + credName + "' for action " + action.getId()); 654 } 655 else { 656 log.debug("Credentials object is null for name= " + credName + ", type=" + credProps.getType()); 657 } 658 } 659 else { 660 log.warn("Could not find credentials properties for: " + credName); 661 } 662 } 663 } 664 665 } 666 667 protected HashMap<String, CredentialsProperties> getActionCredentialsProperties(Context context, 668 WorkflowAction action) throws Exception { 669 HashMap<String, CredentialsProperties> props = new HashMap<String, CredentialsProperties>(); 670 if (context != null && action != null) { 671 String credsInAction = action.getCred(); 672 log.debug("Get credential '" + credsInAction + "' properties for action : " + action.getId()); 673 String[] credNames = credsInAction.split(","); 674 for (String credName : credNames) { 675 CredentialsProperties credProps = getCredProperties(context, credName); 676 props.put(credName, credProps); 677 } 678 } 679 else { 680 log.warn("context or action is null"); 681 } 682 return props; 683 } 684 685 @SuppressWarnings("unchecked") 686 protected CredentialsProperties getCredProperties(Context context, String credName) 687 throws Exception { 688 CredentialsProperties credProp = null; 689 String workflowXml = ((WorkflowJobBean) context.getWorkflow()).getWorkflowInstance().getApp().getDefinition(); 690 Element elementJob = XmlUtils.parseXml(workflowXml); 691 Element credentials = elementJob.getChild("credentials", elementJob.getNamespace()); 692 if (credentials != null) { 693 for (Element credential : (List<Element>) credentials.getChildren("credential", credentials 694 .getNamespace())) { 695 String name = credential.getAttributeValue("name"); 696 String type = credential.getAttributeValue("type"); 697 log.debug("getCredProperties: Name: " + name + ", Type: " + type); 698 if (name.equalsIgnoreCase(credName)) { 699 credProp = new CredentialsProperties(name, type); 700 for (Element property : (List<Element>) credential.getChildren("property", credential 701 .getNamespace())) { 702 credProp.getProperties().put(property.getChildText("name", property.getNamespace()), 703 property.getChildText("value", property.getNamespace())); 704 log.debug("getCredProperties: Properties name :'" 705 + property.getChildText("name", property.getNamespace()) + "', Value : '" 706 + property.getChildText("value", property.getNamespace()) + "'"); 707 } 708 } 709 } 710 } 711 else { 712 log.warn("credentials is null for the action"); 713 } 714 return credProp; 715 } 716 717 void prepare(Context context, Element actionXml) throws ActionExecutorException { 718 Namespace ns = actionXml.getNamespace(); 719 Element prepare = actionXml.getChild("prepare", ns); 720 if (prepare != null) { 721 XLog.getLog(getClass()).debug("Preparing the action with FileSystem operation"); 722 FsActionExecutor fsAe = new FsActionExecutor(); 723 fsAe.doOperations(context, prepare); 724 XLog.getLog(getClass()).debug("FS Operation is completed"); 725 } 726 } 727 728 @Override 729 public void start(Context context, WorkflowAction action) throws ActionExecutorException { 730 try { 731 XLog.getLog(getClass()).debug("Starting action " + action.getId() + " getting Action File System"); 732 FileSystem actionFs = getActionFileSystem(context, action); 733 XLog.getLog(getClass()).debug("Preparing action Dir through copying " + context.getActionDir()); 734 prepareActionDir(actionFs, context); 735 XLog.getLog(getClass()).debug("Action Dir is ready. Submitting the action "); 736 submitLauncher(actionFs, context, action); 737 XLog.getLog(getClass()).debug("Action submit completed. Performing check "); 738 check(context, action); 739 XLog.getLog(getClass()).debug("Action check is done after submission"); 740 } 741 catch (Exception ex) { 742 throw convertException(ex); 743 } 744 } 745 746 @Override 747 public void end(Context context, WorkflowAction action) throws ActionExecutorException { 748 try { 749 String externalStatus = action.getExternalStatus(); 750 WorkflowAction.Status status = externalStatus.equals(SUCCEEDED) ? WorkflowAction.Status.OK 751 : WorkflowAction.Status.ERROR; 752 context.setEndData(status, getActionSignal(status)); 753 } 754 catch (Exception ex) { 755 throw convertException(ex); 756 } 757 finally { 758 try { 759 FileSystem actionFs = getActionFileSystem(context, action); 760 cleanUpActionDir(actionFs, context); 761 } 762 catch (Exception ex) { 763 throw convertException(ex); 764 } 765 } 766 } 767 768 /** 769 * Create job client object 770 * 771 * @param context 772 * @param jobConf 773 * @return 774 * @throws HadoopAccessorException 775 */ 776 protected JobClient createJobClient(Context context, JobConf jobConf) throws HadoopAccessorException { 777 String user = context.getWorkflow().getUser(); 778 String group = context.getWorkflow().getGroup(); 779 return Services.get().get(HadoopAccessorService.class).createJobClient(user, group, jobConf); 780 } 781 782 @Override 783 public void check(Context context, WorkflowAction action) throws ActionExecutorException { 784 JobClient jobClient = null; 785 boolean exception = false; 786 try { 787 Element actionXml = XmlUtils.parseXml(action.getConf()); 788 FileSystem actionFs = getActionFileSystem(context, actionXml); 789 Configuration conf = createBaseHadoopConf(context, actionXml); 790 JobConf jobConf = new JobConf(); 791 XConfiguration.copy(conf, jobConf); 792 jobClient = createJobClient(context, jobConf); 793 RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId())); 794 if (runningJob == null) { 795 context.setExternalStatus(FAILED); 796 context.setExecutionData(FAILED, null); 797 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017", 798 "Unknown hadoop job [{0}] associated with action [{1}]. Failing this action!", action 799 .getExternalId(), action.getId()); 800 } 801 if (runningJob.isComplete()) { 802 Path actionDir = context.getActionDir(); 803 804 String user = context.getWorkflow().getUser(); 805 String group = context.getWorkflow().getGroup(); 806 if (LauncherMapper.hasIdSwap(runningJob, user, group, actionDir)) { 807 String launcherId = action.getExternalId(); 808 Path idSwapPath = LauncherMapper.getIdSwapPath(context.getActionDir()); 809 InputStream is = actionFs.open(idSwapPath); 810 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 811 Properties props = PropertiesUtils.readProperties(reader, maxActionOutputLen); 812 reader.close(); 813 String newId = props.getProperty("id"); 814 runningJob = jobClient.getJob(JobID.forName(newId)); 815 if (runningJob == null) { 816 context.setExternalStatus(FAILED); 817 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017", 818 "Unknown hadoop job [{0}] associated with action [{1}]. Failing this action!", newId, 819 action.getId()); 820 } 821 822 context.setStartData(newId, action.getTrackerUri(), runningJob.getTrackingURL()); 823 XLog.getLog(getClass()).info(XLog.STD, "External ID swap, old ID [{0}] new ID [{1}]", launcherId, 824 newId); 825 } 826 if (runningJob.isComplete()) { 827 XLog.getLog(getClass()).info(XLog.STD, "action completed, external ID [{0}]", 828 action.getExternalId()); 829 if (runningJob.isSuccessful() && LauncherMapper.isMainSuccessful(runningJob)) { 830 Properties props = null; 831 if (getCaptureOutput(action)) { 832 props = new Properties(); 833 if (LauncherMapper.hasOutputData(runningJob)) { 834 Path actionOutput = LauncherMapper.getOutputDataPath(context.getActionDir()); 835 InputStream is = actionFs.open(actionOutput); 836 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 837 props = PropertiesUtils.readProperties(reader, maxActionOutputLen); 838 reader.close(); 839 } 840 } 841 context.setExecutionData(SUCCEEDED, props); 842 XLog.getLog(getClass()).info(XLog.STD, "action produced output"); 843 } 844 else { 845 XLog log = XLog.getLog(getClass()); 846 String errorReason; 847 Path actionError = LauncherMapper.getErrorPath(context.getActionDir()); 848 if (actionFs.exists(actionError)) { 849 InputStream is = actionFs.open(actionError); 850 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 851 Properties props = PropertiesUtils.readProperties(reader, -1); 852 reader.close(); 853 String errorCode = props.getProperty("error.code"); 854 if (errorCode.equals("0")) { 855 errorCode = "JA018"; 856 } 857 errorReason = props.getProperty("error.reason"); 858 log.warn("Launcher ERROR, reason: {0}", errorReason); 859 String exMsg = props.getProperty("exception.message"); 860 String errorInfo = (exMsg != null) ? exMsg : errorReason; 861 context.setErrorInfo(errorCode, errorInfo); 862 String exStackTrace = props.getProperty("exception.stacktrace"); 863 if (exMsg != null) { 864 log.warn("Launcher exception: {0}{E}{1}", exMsg, exStackTrace); 865 } 866 } 867 else { 868 errorReason = XLog.format("LauncherMapper died, check Hadoop log for job [{0}:{1}]", action 869 .getTrackerUri(), action.getExternalId()); 870 log.warn(errorReason); 871 } 872 context.setExecutionData(FAILED_KILLED, null); 873 } 874 } 875 else { 876 context.setExternalStatus(RUNNING); 877 XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]", 878 action.getExternalId(), action.getExternalStatus()); 879 } 880 } 881 else { 882 context.setExternalStatus(RUNNING); 883 XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]", 884 action.getExternalId(), action.getExternalStatus()); 885 } 886 } 887 catch (Exception ex) { 888 XLog.getLog(getClass()).warn("Exception in check(). Message[{0}]", ex.getMessage(), ex); 889 exception = true; 890 throw convertException(ex); 891 } 892 finally { 893 if (jobClient != null) { 894 try { 895 jobClient.close(); 896 } 897 catch (Exception e) { 898 if (exception) { 899 log.error("JobClient error: ", e); 900 } 901 else { 902 throw convertException(e); 903 } 904 } 905 } 906 } 907 } 908 909 protected boolean getCaptureOutput(WorkflowAction action) throws JDOMException { 910 Element eConf = XmlUtils.parseXml(action.getConf()); 911 Namespace ns = eConf.getNamespace(); 912 Element captureOutput = eConf.getChild("capture-output", ns); 913 return captureOutput != null; 914 } 915 916 @Override 917 public void kill(Context context, WorkflowAction action) throws ActionExecutorException { 918 JobClient jobClient = null; 919 boolean exception = false; 920 try { 921 Element actionXml = XmlUtils.parseXml(action.getConf()); 922 Configuration conf = createBaseHadoopConf(context, actionXml); 923 JobConf jobConf = new JobConf(); 924 XConfiguration.copy(conf, jobConf); 925 jobClient = createJobClient(context, jobConf); 926 RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId())); 927 if (runningJob != null) { 928 runningJob.killJob(); 929 } 930 context.setExternalStatus(KILLED); 931 context.setExecutionData(KILLED, null); 932 } 933 catch (Exception ex) { 934 exception = true; 935 throw convertException(ex); 936 } 937 finally { 938 try { 939 FileSystem actionFs = getActionFileSystem(context, action); 940 cleanUpActionDir(actionFs, context); 941 if (jobClient != null) { 942 jobClient.close(); 943 } 944 } 945 catch (Exception ex) { 946 if (exception) { 947 log.error("Error: ", ex); 948 } 949 else { 950 throw convertException(ex); 951 } 952 } 953 } 954 } 955 956 private static Set<String> FINAL_STATUS = new HashSet<String>(); 957 958 static { 959 FINAL_STATUS.add(SUCCEEDED); 960 FINAL_STATUS.add(KILLED); 961 FINAL_STATUS.add(FAILED); 962 FINAL_STATUS.add(FAILED_KILLED); 963 } 964 965 @Override 966 public boolean isCompleted(String externalStatus) { 967 return FINAL_STATUS.contains(externalStatus); 968 } 969 970 }