001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie; 016 017 import org.apache.oozie.util.XLogStreamer; 018 import org.apache.oozie.service.XLogService; 019 import org.apache.oozie.service.DagXLogInfoService; 020 import org.apache.hadoop.conf.Configuration; 021 import org.apache.oozie.client.CoordinatorJob; 022 import org.apache.oozie.client.WorkflowJob; 023 import org.apache.oozie.client.OozieClient; 024 import org.apache.oozie.command.CommandException; 025 import org.apache.oozie.command.XCommand; 026 import org.apache.oozie.command.wf.CompletedActionCommand; 027 import org.apache.oozie.command.wf.CompletedActionXCommand; 028 import org.apache.oozie.command.wf.DefinitionCommand; 029 import org.apache.oozie.command.wf.DefinitionXCommand; 030 import org.apache.oozie.command.wf.ExternalIdCommand; 031 import org.apache.oozie.command.wf.ExternalIdXCommand; 032 import org.apache.oozie.command.wf.JobCommand; 033 import org.apache.oozie.command.wf.JobXCommand; 034 import org.apache.oozie.command.wf.JobsCommand; 035 import org.apache.oozie.command.wf.JobsXCommand; 036 import org.apache.oozie.command.wf.KillCommand; 037 import org.apache.oozie.command.wf.KillXCommand; 038 import org.apache.oozie.command.wf.ReRunCommand; 039 import org.apache.oozie.command.wf.ReRunXCommand; 040 import org.apache.oozie.command.wf.ResumeCommand; 041 import org.apache.oozie.command.wf.ResumeXCommand; 042 import org.apache.oozie.command.wf.StartCommand; 043 import org.apache.oozie.command.wf.StartXCommand; 044 import org.apache.oozie.command.wf.SubmitCommand; 045 import org.apache.oozie.command.wf.SubmitHttpCommand; 046 import org.apache.oozie.command.wf.SubmitHttpXCommand; 047 import org.apache.oozie.command.wf.SubmitMRCommand; 048 import org.apache.oozie.command.wf.SubmitMRXCommand; 049 import org.apache.oozie.command.wf.SubmitPigCommand; 050 import org.apache.oozie.command.wf.SubmitPigXCommand; 051 import org.apache.oozie.command.wf.SubmitXCommand; 052 import org.apache.oozie.command.wf.SuspendCommand; 053 import org.apache.oozie.command.wf.SuspendXCommand; 054 import org.apache.oozie.command.wf.WorkflowActionInfoCommand; 055 import org.apache.oozie.command.wf.WorkflowActionInfoXCommand; 056 import org.apache.oozie.service.Services; 057 import org.apache.oozie.service.CallableQueueService; 058 import org.apache.oozie.util.ParamChecker; 059 import org.apache.oozie.util.XCallable; 060 import org.apache.oozie.util.XLog; 061 062 import java.io.Writer; 063 import java.util.Date; 064 import java.util.List; 065 import java.util.Properties; 066 import java.util.Set; 067 import java.util.HashSet; 068 import java.util.StringTokenizer; 069 import java.util.Map; 070 import java.util.HashMap; 071 import java.util.ArrayList; 072 import java.io.IOException; 073 074 /** 075 * The DagEngine provides all the DAG engine functionality for WS calls. 076 */ 077 public class DagEngine extends BaseEngine { 078 079 private static final int HIGH_PRIORITY = 2; 080 private boolean useXCommand = true; 081 private static XLog LOG = XLog.getLog(DagEngine.class); 082 083 /** 084 * Create a system Dag engine, with no user and no group. 085 */ 086 public DagEngine() { 087 if (Services.get().getConf().getBoolean(USE_XCOMMAND, true) == false) { 088 useXCommand = false; 089 LOG.debug("Oozie DagEngine is not using XCommands."); 090 } 091 else { 092 LOG.debug("Oozie DagEngine is using XCommands."); 093 } 094 } 095 096 /** 097 * Create a Dag engine to perform operations on behave of a user. 098 * 099 * @param user user name. 100 * @param authToken the authentication token. 101 */ 102 public DagEngine(String user, String authToken) { 103 this(); 104 105 this.user = ParamChecker.notEmpty(user, "user"); 106 this.authToken = ParamChecker.notEmpty(authToken, "authToken"); 107 } 108 109 /** 110 * Submit a workflow job. <p/> It validates configuration properties. 111 * 112 * @param conf job configuration. 113 * @param startJob indicates if the job should be started or not. 114 * @return the job Id. 115 * @throws DagEngineException thrown if the job could not be created. 116 */ 117 @Override 118 public String submitJob(Configuration conf, boolean startJob) throws DagEngineException { 119 validateSubmitConfiguration(conf); 120 121 try { 122 String jobId; 123 if (useXCommand) { 124 SubmitXCommand submit = new SubmitXCommand(conf, getAuthToken()); 125 jobId = submit.call(); 126 } 127 else { 128 SubmitCommand submit = new SubmitCommand(conf, getAuthToken()); 129 jobId = submit.call(); 130 } 131 if (startJob) { 132 start(jobId); 133 } 134 return jobId; 135 } 136 catch (CommandException ex) { 137 throw new DagEngineException(ex); 138 } 139 } 140 141 /** 142 * Submit a pig/mapreduce job through HTTP. 143 * <p/> 144 * It validates configuration properties. 145 * 146 * @param conf job configuration. 147 * @param jobType job type - can be "pig" or "mapreduce". 148 * @return the job Id. 149 * @throws DagEngineException thrown if the job could not be created. 150 */ 151 public String submitHttpJob(Configuration conf, String jobType) throws DagEngineException { 152 validateSubmitConfiguration(conf); 153 154 try { 155 String jobId; 156 if (useXCommand) { 157 SubmitHttpXCommand submit = null; 158 if (jobType.equals("pig")) { 159 submit = new SubmitPigXCommand(conf, getAuthToken()); 160 } 161 else if (jobType.equals("mapreduce")) { 162 submit = new SubmitMRXCommand(conf, getAuthToken()); 163 } 164 165 jobId = submit.call(); 166 } 167 else { 168 SubmitHttpCommand submit = null; 169 if (jobType.equals("pig")) { 170 submit = new SubmitPigCommand(conf, getAuthToken()); 171 } 172 else if (jobType.equals("mapreduce")) { 173 submit = new SubmitMRCommand(conf, getAuthToken()); 174 } 175 176 jobId = submit.call(); 177 } 178 start(jobId); 179 return jobId; 180 } 181 catch (CommandException ex) { 182 throw new DagEngineException(ex); 183 } 184 } 185 186 private void validateSubmitConfiguration(Configuration conf) throws DagEngineException { 187 if (conf.get(OozieClient.APP_PATH) == null) { 188 throw new DagEngineException(ErrorCode.E0401, OozieClient.APP_PATH); 189 } 190 } 191 192 /** 193 * Start a job. 194 * 195 * @param jobId job Id. 196 * @throws DagEngineException thrown if the job could not be started. 197 */ 198 @Override 199 public void start(String jobId) throws DagEngineException { 200 // Changing to synchronous call from asynchronous queuing to prevent the 201 // loss of command if the queue is full or the queue is lost in case of 202 // failure. 203 try { 204 if (useXCommand) { 205 new StartXCommand(jobId).call(); 206 } 207 else { 208 new StartCommand(jobId).call(); 209 } 210 } 211 catch (CommandException e) { 212 throw new DagEngineException(e); 213 } 214 } 215 216 /** 217 * Resume a job. 218 * 219 * @param jobId job Id. 220 * @throws DagEngineException thrown if the job could not be resumed. 221 */ 222 @Override 223 public void resume(String jobId) throws DagEngineException { 224 // Changing to synchronous call from asynchronous queuing to prevent the 225 // loss of command if the queue is full or the queue is lost in case of 226 // failure. 227 try { 228 if (useXCommand) { 229 new ResumeXCommand(jobId).call(); 230 } 231 else { 232 new ResumeCommand(jobId).call(); 233 } 234 } 235 catch (CommandException e) { 236 throw new DagEngineException(e); 237 } 238 } 239 240 /** 241 * Suspend a job. 242 * 243 * @param jobId job Id. 244 * @throws DagEngineException thrown if the job could not be suspended. 245 */ 246 @Override 247 public void suspend(String jobId) throws DagEngineException { 248 // Changing to synchronous call from asynchronous queuing to prevent the 249 // loss of command if the queue is full or the queue is lost in case of 250 // failure. 251 try { 252 if (useXCommand) { 253 new SuspendXCommand(jobId).call(); 254 } 255 else { 256 new SuspendCommand(jobId).call(); 257 } 258 } 259 catch (CommandException e) { 260 throw new DagEngineException(e); 261 } 262 } 263 264 /** 265 * Kill a job. 266 * 267 * @param jobId job Id. 268 * @throws DagEngineException thrown if the job could not be killed. 269 */ 270 @Override 271 public void kill(String jobId) throws DagEngineException { 272 // Changing to synchronous call from asynchronous queuing to prevent the 273 // loss of command if the queue is full or the queue is lost in case of 274 // failure. 275 try { 276 if (useXCommand) { 277 new KillXCommand(jobId).call(); 278 } 279 else { 280 new KillCommand(jobId).call(); 281 } 282 LOG.info("User " + user + " killed the WF job " + jobId); 283 } 284 catch (CommandException e) { 285 throw new DagEngineException(e); 286 } 287 } 288 289 /* (non-Javadoc) 290 * @see org.apache.oozie.BaseEngine#change(java.lang.String, java.lang.String) 291 */ 292 @Override 293 public void change(String jobId, String changeValue) throws DagEngineException { 294 // This code should not be reached. 295 throw new DagEngineException(ErrorCode.E1017); 296 } 297 298 /** 299 * Rerun a job. 300 * 301 * @param jobId job Id to rerun. 302 * @param conf configuration information for the rerun. 303 * @throws DagEngineException thrown if the job could not be rerun. 304 */ 305 @Override 306 public void reRun(String jobId, Configuration conf) throws DagEngineException { 307 try { 308 validateReRunConfiguration(conf); 309 310 if (useXCommand) { 311 new ReRunXCommand(jobId, conf, getAuthToken()).call(); 312 } 313 else { 314 new ReRunCommand(jobId, conf, getAuthToken()).call(); 315 } 316 start(jobId); 317 } 318 catch (CommandException ex) { 319 throw new DagEngineException(ex); 320 } 321 } 322 323 private void validateReRunConfiguration(Configuration conf) throws DagEngineException { 324 if (conf.get(OozieClient.APP_PATH) == null) { 325 throw new DagEngineException(ErrorCode.E0401, OozieClient.APP_PATH); 326 } 327 if (conf.get(OozieClient.RERUN_SKIP_NODES) == null && conf.get(OozieClient.RERUN_FAIL_NODES) == null) { 328 throw new DagEngineException(ErrorCode.E0401, OozieClient.RERUN_SKIP_NODES + " OR " 329 + OozieClient.RERUN_FAIL_NODES); 330 } 331 if (conf.get(OozieClient.RERUN_SKIP_NODES) != null && conf.get(OozieClient.RERUN_FAIL_NODES) != null) { 332 throw new DagEngineException(ErrorCode.E0404, OozieClient.RERUN_SKIP_NODES + " OR " 333 + OozieClient.RERUN_FAIL_NODES); 334 } 335 } 336 337 /** 338 * Process an action callback. 339 * 340 * @param actionId the action Id. 341 * @param externalStatus the action external status. 342 * @param actionData the action output data, <code>null</code> if none. 343 * @throws DagEngineException thrown if the callback could not be processed. 344 */ 345 public void processCallback(String actionId, String externalStatus, Properties actionData) 346 throws DagEngineException { 347 XLog.Info.get().clearParameter(XLogService.GROUP); 348 XLog.Info.get().clearParameter(XLogService.USER); 349 XCallable<Void> command = null; 350 351 if (useXCommand) { 352 command = new CompletedActionXCommand(actionId, externalStatus, actionData, HIGH_PRIORITY); 353 } 354 else { 355 command = new CompletedActionCommand(actionId, externalStatus, actionData, HIGH_PRIORITY); 356 } 357 if (!Services.get().get(CallableQueueService.class).queue(command)) { 358 LOG.warn(XLog.OPS, "queue is full or system is in SAFEMODE, ignoring callback"); 359 } 360 } 361 362 /** 363 * Return the info about a job. 364 * 365 * @param jobId job Id. 366 * @return the workflow job info. 367 * @throws DagEngineException thrown if the job info could not be obtained. 368 */ 369 @Override 370 public WorkflowJob getJob(String jobId) throws DagEngineException { 371 try { 372 if (useXCommand) { 373 return new JobXCommand(jobId).call(); 374 } 375 else { 376 return new JobCommand(jobId).call(); 377 } 378 } 379 catch (CommandException ex) { 380 throw new DagEngineException(ex); 381 } 382 } 383 384 /** 385 * Return the info about a job with actions subset. 386 * 387 * @param jobId job Id 388 * @param start starting from this index in the list of actions belonging to the job 389 * @param length number of actions to be returned 390 * @return the workflow job info. 391 * @throws DagEngineException thrown if the job info could not be obtained. 392 */ 393 @Override 394 public WorkflowJob getJob(String jobId, int start, int length) throws DagEngineException { 395 try { 396 if (useXCommand) { 397 return new JobXCommand(jobId, start, length).call(); 398 } 399 else { 400 return new JobCommand(jobId, start, length).call(); 401 } 402 } 403 catch (CommandException ex) { 404 throw new DagEngineException(ex); 405 } 406 } 407 408 /** 409 * Return the a job definition. 410 * 411 * @param jobId job Id. 412 * @return the job definition. 413 * @throws DagEngineException thrown if the job definition could no be obtained. 414 */ 415 @Override 416 public String getDefinition(String jobId) throws DagEngineException { 417 try { 418 if (useXCommand) { 419 return new DefinitionXCommand(jobId).call(); 420 } 421 else { 422 return new DefinitionCommand(jobId).call(); 423 } 424 } 425 catch (CommandException ex) { 426 throw new DagEngineException(ex); 427 } 428 } 429 430 /** 431 * Stream the log of a job. 432 * 433 * @param jobId job Id. 434 * @param writer writer to stream the log to. 435 * @throws IOException thrown if the log cannot be streamed. 436 * @throws DagEngineException thrown if there is error in getting the Workflow Information for jobId. 437 */ 438 @Override 439 public void streamLog(String jobId, Writer writer) throws IOException, DagEngineException { 440 XLogStreamer.Filter filter = new XLogStreamer.Filter(); 441 filter.setParameter(DagXLogInfoService.JOB, jobId); 442 WorkflowJob job = getJob(jobId); 443 Date lastTime = job.getEndTime(); 444 if (lastTime == null) { 445 lastTime = job.getLastModifiedTime(); 446 } 447 Services.get().get(XLogService.class).streamLog(filter, job.getCreatedTime(), lastTime, writer); 448 } 449 450 private static final Set<String> FILTER_NAMES = new HashSet<String>(); 451 452 static { 453 FILTER_NAMES.add(OozieClient.FILTER_USER); 454 FILTER_NAMES.add(OozieClient.FILTER_NAME); 455 FILTER_NAMES.add(OozieClient.FILTER_GROUP); 456 FILTER_NAMES.add(OozieClient.FILTER_STATUS); 457 } 458 459 /** 460 * Validate a jobs filter. 461 * 462 * @param filter filter to validate. 463 * @return the parsed filter. 464 * @throws DagEngineException thrown if the filter is invalid. 465 */ 466 protected Map<String, List<String>> parseFilter(String filter) throws DagEngineException { 467 Map<String, List<String>> map = new HashMap<String, List<String>>(); 468 if (filter != null) { 469 StringTokenizer st = new StringTokenizer(filter, ";"); 470 while (st.hasMoreTokens()) { 471 String token = st.nextToken(); 472 if (token.contains("=")) { 473 String[] pair = token.split("="); 474 if (pair.length != 2) { 475 throw new DagEngineException(ErrorCode.E0420, filter, "elements must be name=value pairs"); 476 } 477 if (!FILTER_NAMES.contains(pair[0])) { 478 throw new DagEngineException(ErrorCode.E0420, filter, XLog 479 .format("invalid name [{0}]", pair[0])); 480 } 481 if (pair[0].equals("status")) { 482 try { 483 WorkflowJob.Status.valueOf(pair[1]); 484 } 485 catch (IllegalArgumentException ex) { 486 throw new DagEngineException(ErrorCode.E0420, filter, XLog.format("invalid status [{0}]", 487 pair[1])); 488 } 489 } 490 List<String> list = map.get(pair[0]); 491 if (list == null) { 492 list = new ArrayList<String>(); 493 map.put(pair[0], list); 494 } 495 list.add(pair[1]); 496 } 497 else { 498 throw new DagEngineException(ErrorCode.E0420, filter, "elements must be name=value pairs"); 499 } 500 } 501 } 502 return map; 503 } 504 505 /** 506 * Return the info about a set of jobs. 507 * 508 * @param filterStr job filter. Refer to the {@link org.apache.oozie.client.OozieClient} for the filter syntax. 509 * @param start offset, base 1. 510 * @param len number of jobs to return. 511 * @return job info for all matching jobs, the jobs don't contain node action information. 512 * @throws DagEngineException thrown if the jobs info could not be obtained. 513 */ 514 public WorkflowsInfo getJobs(String filterStr, int start, int len) throws DagEngineException { 515 Map<String, List<String>> filter = parseFilter(filterStr); 516 try { 517 if (useXCommand) { 518 return new JobsXCommand(filter, start, len).call(); 519 } 520 else { 521 return new JobsCommand(filter, start, len).call(); 522 } 523 } 524 catch (CommandException dce) { 525 throw new DagEngineException(dce); 526 } 527 } 528 529 /** 530 * Return the workflow Job ID for an external ID. <p/> This is reverse lookup for recovery purposes. 531 * 532 * @param externalId external ID provided at job submission time. 533 * @return the associated workflow job ID if any, <code>null</code> if none. 534 * @throws DagEngineException thrown if the lookup could not be done. 535 */ 536 @Override 537 public String getJobIdForExternalId(String externalId) throws DagEngineException { 538 try { 539 if (useXCommand) { 540 return new ExternalIdXCommand(externalId).call(); 541 } 542 else { 543 return new ExternalIdCommand(externalId).call(); 544 } 545 } 546 catch (CommandException dce) { 547 throw new DagEngineException(dce); 548 } 549 } 550 551 @Override 552 public CoordinatorJob getCoordJob(String jobId) throws BaseEngineException { 553 throw new BaseEngineException(new XException(ErrorCode.E0301)); 554 } 555 556 @Override 557 public CoordinatorJob getCoordJob(String jobId, int start, int length) throws BaseEngineException { 558 throw new BaseEngineException(new XException(ErrorCode.E0301)); 559 } 560 561 public WorkflowActionBean getWorkflowAction(String actionId) throws BaseEngineException { 562 try { 563 if (useXCommand) { 564 return new WorkflowActionInfoXCommand(actionId).call(); 565 } 566 else { 567 return new WorkflowActionInfoCommand(actionId).call(); 568 } 569 } 570 catch (CommandException ex) { 571 throw new BaseEngineException(ex); 572 } 573 } 574 575 @Override 576 public String dryrunSubmit(Configuration conf, boolean startJob) throws BaseEngineException { 577 return null; 578 } 579 }