001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.command.wf; 016 017 import java.util.Date; 018 019 import javax.servlet.jsp.el.ELException; 020 021 import org.apache.hadoop.conf.Configuration; 022 import org.apache.oozie.ErrorCode; 023 import org.apache.oozie.FaultInjection; 024 import org.apache.oozie.WorkflowActionBean; 025 import org.apache.oozie.WorkflowJobBean; 026 import org.apache.oozie.XException; 027 import org.apache.oozie.action.ActionExecutor; 028 import org.apache.oozie.action.ActionExecutorException; 029 import org.apache.oozie.client.OozieClient; 030 import org.apache.oozie.client.WorkflowAction; 031 import org.apache.oozie.client.WorkflowJob; 032 import org.apache.oozie.client.SLAEvent.SlaAppType; 033 import org.apache.oozie.client.SLAEvent.Status; 034 import org.apache.oozie.command.CommandException; 035 import org.apache.oozie.command.PreconditionException; 036 import org.apache.oozie.command.coord.CoordActionUpdateXCommand; 037 import org.apache.oozie.executor.jpa.JPAExecutorException; 038 import org.apache.oozie.executor.jpa.WorkflowActionGetJPAExecutor; 039 import org.apache.oozie.executor.jpa.WorkflowActionUpdateJPAExecutor; 040 import org.apache.oozie.executor.jpa.WorkflowJobGetJPAExecutor; 041 import org.apache.oozie.executor.jpa.WorkflowJobUpdateJPAExecutor; 042 import org.apache.oozie.service.ActionService; 043 import org.apache.oozie.service.JPAService; 044 import org.apache.oozie.service.Services; 045 import org.apache.oozie.service.UUIDService; 046 import org.apache.oozie.util.ELEvaluationException; 047 import org.apache.oozie.util.Instrumentation; 048 import org.apache.oozie.util.LogUtils; 049 import org.apache.oozie.util.XLog; 050 import org.apache.oozie.util.XmlUtils; 051 import org.apache.oozie.util.db.SLADbXOperations; 052 053 public class ActionStartXCommand extends ActionXCommand<Void> { 054 public static final String EL_ERROR = "EL_ERROR"; 055 public static final String EL_EVAL_ERROR = "EL_EVAL_ERROR"; 056 public static final String COULD_NOT_START = "COULD_NOT_START"; 057 public static final String START_DATA_MISSING = "START_DATA_MISSING"; 058 public static final String EXEC_DATA_MISSING = "EXEC_DATA_MISSING"; 059 060 private String jobId = null; 061 private String actionId = null; 062 private WorkflowJobBean wfJob = null; 063 private WorkflowActionBean wfAction = null; 064 private JPAService jpaService = null; 065 private ActionExecutor executor = null; 066 067 public ActionStartXCommand(String actionId, String type) { 068 super("action.start", type, 0); 069 this.actionId = actionId; 070 this.jobId = Services.get().get(UUIDService.class).getId(actionId); 071 } 072 073 @Override 074 protected boolean isLockRequired() { 075 return true; 076 } 077 078 @Override 079 protected String getEntityKey() { 080 return this.jobId; 081 } 082 083 @Override 084 protected void loadState() throws CommandException { 085 try { 086 jpaService = Services.get().get(JPAService.class); 087 if (jpaService != null) { 088 this.wfJob = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId)); 089 this.wfAction = jpaService.execute(new WorkflowActionGetJPAExecutor(actionId)); 090 LogUtils.setLogInfo(wfJob, logInfo); 091 LogUtils.setLogInfo(wfAction, logInfo); 092 } 093 else { 094 throw new CommandException(ErrorCode.E0610); 095 } 096 } 097 catch (XException ex) { 098 throw new CommandException(ex); 099 } 100 } 101 102 @Override 103 protected void verifyPrecondition() throws CommandException, PreconditionException { 104 if (wfJob == null) { 105 throw new PreconditionException(ErrorCode.E0604, jobId); 106 } 107 if (wfAction == null) { 108 throw new PreconditionException(ErrorCode.E0605, actionId); 109 } 110 if (wfAction.isPending() 111 && (wfAction.getStatus() == WorkflowActionBean.Status.PREP 112 || wfAction.getStatus() == WorkflowActionBean.Status.START_RETRY || wfAction.getStatus() == WorkflowActionBean.Status.START_MANUAL)) { 113 if (wfJob.getStatus() != WorkflowJob.Status.RUNNING) { 114 throw new PreconditionException(ErrorCode.E0810, WorkflowJob.Status.RUNNING.toString()); 115 } 116 } 117 else { 118 throw new PreconditionException(ErrorCode.E0816, wfAction.getPending(), wfAction.getStatusStr()); 119 } 120 121 executor = Services.get().get(ActionService.class).getExecutor(wfAction.getType()); 122 if (executor == null) { 123 throw new CommandException(ErrorCode.E0802, wfAction.getType()); 124 } 125 } 126 127 @Override 128 protected Void execute() throws CommandException { 129 130 LOG.debug("STARTED ActionStartXCommand for wf actionId=" + actionId); 131 Configuration conf = wfJob.getWorkflowInstance().getConf(); 132 133 int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries()); 134 long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval()); 135 executor.setMaxRetries(maxRetries); 136 executor.setRetryInterval(retryInterval); 137 138 ActionExecutorContext context = null; 139 try { 140 boolean isRetry = false; 141 if (wfAction.getStatus() == WorkflowActionBean.Status.START_RETRY 142 || wfAction.getStatus() == WorkflowActionBean.Status.START_MANUAL) { 143 isRetry = true; 144 } 145 context = new ActionXCommand.ActionExecutorContext(wfJob, wfAction, isRetry); 146 try { 147 String tmpActionConf = XmlUtils.removeComments(wfAction.getConf()); 148 String actionConf = context.getELEvaluator().evaluate(tmpActionConf, String.class); 149 wfAction.setConf(actionConf); 150 151 LOG.debug("Start, name [{0}] type [{1}] configuration{E}{E}{2}{E}", wfAction.getName(), wfAction 152 .getType(), actionConf); 153 154 } 155 catch (ELEvaluationException ex) { 156 throw new ActionExecutorException(ActionExecutorException.ErrorType.TRANSIENT, EL_EVAL_ERROR, ex 157 .getMessage(), ex); 158 } 159 catch (ELException ex) { 160 context.setErrorInfo(EL_ERROR, ex.getMessage()); 161 LOG.warn("ELException in ActionStartXCommand ", ex.getMessage(), ex); 162 handleError(context, wfJob, wfAction); 163 return null; 164 } 165 catch (org.jdom.JDOMException je) { 166 context.setErrorInfo("ParsingError", je.getMessage()); 167 LOG.warn("JDOMException in ActionStartXCommand ", je.getMessage(), je); 168 handleError(context, wfJob, wfAction); 169 return null; 170 } 171 catch (Exception ex) { 172 context.setErrorInfo(EL_ERROR, ex.getMessage()); 173 LOG.warn("Exception in ActionStartXCommand ", ex.getMessage(), ex); 174 handleError(context, wfJob, wfAction); 175 return null; 176 } 177 wfAction.setErrorInfo(null, null); 178 incrActionCounter(wfAction.getType(), 1); 179 180 Instrumentation.Cron cron = new Instrumentation.Cron(); 181 cron.start(); 182 executor.start(context, wfAction); 183 cron.stop(); 184 FaultInjection.activate("org.apache.oozie.command.SkipCommitFaultInjection"); 185 addActionCron(wfAction.getType(), cron); 186 187 wfAction.setRetries(0); 188 if (wfAction.isExecutionComplete()) { 189 if (!context.isExecuted()) { 190 LOG.warn(XLog.OPS, "Action Completed, ActionExecutor [{0}] must call setExecutionData()", executor 191 .getType()); 192 wfAction.setErrorInfo(EXEC_DATA_MISSING, 193 "Execution Complete, but Execution Data Missing from Action"); 194 failJob(context); 195 jpaService.execute(new WorkflowActionUpdateJPAExecutor(wfAction)); 196 jpaService.execute(new WorkflowJobUpdateJPAExecutor(wfJob)); 197 return null; 198 } 199 wfAction.setPending(); 200 queue(new ActionEndXCommand(wfAction.getId(), wfAction.getType())); 201 } 202 else { 203 if (!context.isStarted()) { 204 LOG.warn(XLog.OPS, "Action Started, ActionExecutor [{0}] must call setStartData()", executor 205 .getType()); 206 wfAction.setErrorInfo(START_DATA_MISSING, "Execution Started, but Start Data Missing from Action"); 207 failJob(context); 208 jpaService.execute(new WorkflowActionUpdateJPAExecutor(wfAction)); 209 jpaService.execute(new WorkflowJobUpdateJPAExecutor(wfJob)); 210 return null; 211 } 212 queue(new NotificationXCommand(wfJob, wfAction)); 213 } 214 215 LOG.warn(XLog.STD, "[***" + wfAction.getId() + "***]" + "Action status=" + wfAction.getStatusStr()); 216 217 jpaService.execute(new WorkflowActionUpdateJPAExecutor(wfAction)); 218 jpaService.execute(new WorkflowJobUpdateJPAExecutor(wfJob)); 219 // Add SLA status event (STARTED) for WF_ACTION 220 SLADbXOperations.writeStausEvent(wfAction.getSlaXml(), wfAction.getId(), Status.STARTED, 221 SlaAppType.WORKFLOW_ACTION); 222 LOG.warn(XLog.STD, "[***" + wfAction.getId() + "***]" + "Action updated in DB!"); 223 224 } 225 catch (ActionExecutorException ex) { 226 LOG.warn("Error starting action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]", 227 wfAction.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage(), ex); 228 wfAction.setErrorInfo(ex.getErrorCode(), ex.getMessage()); 229 switch (ex.getErrorType()) { 230 case TRANSIENT: 231 if (!handleTransient(context, executor, WorkflowAction.Status.START_RETRY)) { 232 handleNonTransient(context, executor, WorkflowAction.Status.START_MANUAL); 233 wfAction.setPendingAge(new Date()); 234 wfAction.setRetries(0); 235 wfAction.setStartTime(null); 236 } 237 break; 238 case NON_TRANSIENT: 239 handleNonTransient(context, executor, WorkflowAction.Status.START_MANUAL); 240 break; 241 case ERROR: 242 handleError(context, executor, WorkflowAction.Status.ERROR.toString(), true, 243 WorkflowAction.Status.DONE); 244 break; 245 case FAILED: 246 try { 247 failJob(context); 248 // update coordinator action 249 new CoordActionUpdateXCommand(wfJob, 3).call(); 250 new WfEndXCommand(wfJob).call(); //To delete the WF temp dir 251 SLADbXOperations.writeStausEvent(wfAction.getSlaXml(), wfAction.getId(), Status.FAILED, 252 SlaAppType.WORKFLOW_ACTION); 253 SLADbXOperations.writeStausEvent(wfJob.getSlaXml(), wfJob.getId(), Status.FAILED, 254 SlaAppType.WORKFLOW_JOB); 255 } 256 catch (XException x) { 257 LOG.warn("ActionStartXCommand - case:FAILED ", x.getMessage()); 258 } 259 break; 260 } 261 try { 262 jpaService.execute(new WorkflowActionUpdateJPAExecutor(wfAction)); 263 jpaService.execute(new WorkflowJobUpdateJPAExecutor(wfJob)); 264 } 265 catch (JPAExecutorException je) { 266 throw new CommandException(je); 267 } 268 } 269 catch (JPAExecutorException je) { 270 throw new CommandException(je); 271 } 272 273 LOG.debug("ENDED ActionStartXCommand for wf actionId=" + actionId + ", jobId=" + jobId); 274 275 return null; 276 } 277 278 private void handleError(ActionExecutorContext context, WorkflowJobBean workflow, WorkflowActionBean action) 279 throws CommandException { 280 failJob(context); 281 try { 282 jpaService.execute(new WorkflowActionUpdateJPAExecutor(action)); 283 jpaService.execute(new WorkflowJobUpdateJPAExecutor(workflow)); 284 } 285 catch (JPAExecutorException je) { 286 throw new CommandException(je); 287 } 288 SLADbXOperations.writeStausEvent(action.getSlaXml(), action.getId(), Status.FAILED, SlaAppType.WORKFLOW_ACTION); 289 SLADbXOperations.writeStausEvent(workflow.getSlaXml(), workflow.getId(), Status.FAILED, SlaAppType.WORKFLOW_JOB); 290 // update coordinator action 291 new CoordActionUpdateXCommand(workflow, 3).call(); 292 new WfEndXCommand(wfJob).call(); //To delete the WF temp dir 293 return; 294 } 295 296 }