001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.wf;
016    
017    import java.util.Date;
018    
019    import javax.servlet.jsp.el.ELException;
020    
021    import org.apache.hadoop.conf.Configuration;
022    import org.apache.oozie.ErrorCode;
023    import org.apache.oozie.FaultInjection;
024    import org.apache.oozie.WorkflowActionBean;
025    import org.apache.oozie.WorkflowJobBean;
026    import org.apache.oozie.XException;
027    import org.apache.oozie.action.ActionExecutor;
028    import org.apache.oozie.action.ActionExecutorException;
029    import org.apache.oozie.client.OozieClient;
030    import org.apache.oozie.client.WorkflowAction;
031    import org.apache.oozie.client.WorkflowJob;
032    import org.apache.oozie.client.SLAEvent.SlaAppType;
033    import org.apache.oozie.client.SLAEvent.Status;
034    import org.apache.oozie.command.CommandException;
035    import org.apache.oozie.command.PreconditionException;
036    import org.apache.oozie.command.coord.CoordActionUpdateXCommand;
037    import org.apache.oozie.executor.jpa.JPAExecutorException;
038    import org.apache.oozie.executor.jpa.WorkflowActionGetJPAExecutor;
039    import org.apache.oozie.executor.jpa.WorkflowActionUpdateJPAExecutor;
040    import org.apache.oozie.executor.jpa.WorkflowJobGetJPAExecutor;
041    import org.apache.oozie.executor.jpa.WorkflowJobUpdateJPAExecutor;
042    import org.apache.oozie.service.ActionService;
043    import org.apache.oozie.service.JPAService;
044    import org.apache.oozie.service.Services;
045    import org.apache.oozie.service.UUIDService;
046    import org.apache.oozie.util.ELEvaluationException;
047    import org.apache.oozie.util.Instrumentation;
048    import org.apache.oozie.util.LogUtils;
049    import org.apache.oozie.util.XLog;
050    import org.apache.oozie.util.XmlUtils;
051    import org.apache.oozie.util.db.SLADbXOperations;
052    
053    public class ActionStartXCommand extends ActionXCommand<Void> {
054        public static final String EL_ERROR = "EL_ERROR";
055        public static final String EL_EVAL_ERROR = "EL_EVAL_ERROR";
056        public static final String COULD_NOT_START = "COULD_NOT_START";
057        public static final String START_DATA_MISSING = "START_DATA_MISSING";
058        public static final String EXEC_DATA_MISSING = "EXEC_DATA_MISSING";
059    
060        private String jobId = null;
061        private String actionId = null;
062        private WorkflowJobBean wfJob = null;
063        private WorkflowActionBean wfAction = null;
064        private JPAService jpaService = null;
065        private ActionExecutor executor = null;
066    
067        public ActionStartXCommand(String actionId, String type) {
068            super("action.start", type, 0);
069            this.actionId = actionId;
070            this.jobId = Services.get().get(UUIDService.class).getId(actionId);
071        }
072    
073        @Override
074        protected boolean isLockRequired() {
075            return true;
076        }
077    
078        @Override
079        protected String getEntityKey() {
080            return this.jobId;
081        }
082    
083        @Override
084        protected void loadState() throws CommandException {
085            try {
086                jpaService = Services.get().get(JPAService.class);
087                if (jpaService != null) {
088                    this.wfJob = jpaService.execute(new WorkflowJobGetJPAExecutor(jobId));
089                    this.wfAction = jpaService.execute(new WorkflowActionGetJPAExecutor(actionId));
090                    LogUtils.setLogInfo(wfJob, logInfo);
091                    LogUtils.setLogInfo(wfAction, logInfo);
092                }
093                else {
094                    throw new CommandException(ErrorCode.E0610);
095                }
096            }
097            catch (XException ex) {
098                throw new CommandException(ex);
099            }
100        }
101    
102        @Override
103        protected void verifyPrecondition() throws CommandException, PreconditionException {
104            if (wfJob == null) {
105                throw new PreconditionException(ErrorCode.E0604, jobId);
106            }
107            if (wfAction == null) {
108                throw new PreconditionException(ErrorCode.E0605, actionId);
109            }
110            if (wfAction.isPending()
111                    && (wfAction.getStatus() == WorkflowActionBean.Status.PREP
112                            || wfAction.getStatus() == WorkflowActionBean.Status.START_RETRY || wfAction.getStatus() == WorkflowActionBean.Status.START_MANUAL)) {
113                if (wfJob.getStatus() != WorkflowJob.Status.RUNNING) {
114                    throw new PreconditionException(ErrorCode.E0810, WorkflowJob.Status.RUNNING.toString());
115                }
116            }
117            else {
118                throw new PreconditionException(ErrorCode.E0816, wfAction.getPending(), wfAction.getStatusStr());
119            }
120    
121            executor = Services.get().get(ActionService.class).getExecutor(wfAction.getType());
122            if (executor == null) {
123                throw new CommandException(ErrorCode.E0802, wfAction.getType());
124            }
125        }
126    
127        @Override
128        protected Void execute() throws CommandException {
129    
130            LOG.debug("STARTED ActionStartXCommand for wf actionId=" + actionId);
131            Configuration conf = wfJob.getWorkflowInstance().getConf();
132    
133            int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries());
134            long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval());
135            executor.setMaxRetries(maxRetries);
136            executor.setRetryInterval(retryInterval);
137    
138            ActionExecutorContext context = null;
139            try {
140                boolean isRetry = false;
141                if (wfAction.getStatus() == WorkflowActionBean.Status.START_RETRY
142                        || wfAction.getStatus() == WorkflowActionBean.Status.START_MANUAL) {
143                    isRetry = true;
144                }
145                context = new ActionXCommand.ActionExecutorContext(wfJob, wfAction, isRetry);
146                try {
147                    String tmpActionConf = XmlUtils.removeComments(wfAction.getConf());
148                    String actionConf = context.getELEvaluator().evaluate(tmpActionConf, String.class);
149                    wfAction.setConf(actionConf);
150    
151                    LOG.debug("Start, name [{0}] type [{1}] configuration{E}{E}{2}{E}", wfAction.getName(), wfAction
152                            .getType(), actionConf);
153    
154                }
155                catch (ELEvaluationException ex) {
156                    throw new ActionExecutorException(ActionExecutorException.ErrorType.TRANSIENT, EL_EVAL_ERROR, ex
157                            .getMessage(), ex);
158                }
159                catch (ELException ex) {
160                    context.setErrorInfo(EL_ERROR, ex.getMessage());
161                    LOG.warn("ELException in ActionStartXCommand ", ex.getMessage(), ex);
162                    handleError(context, wfJob, wfAction);
163                    return null;
164                }
165                catch (org.jdom.JDOMException je) {
166                    context.setErrorInfo("ParsingError", je.getMessage());
167                    LOG.warn("JDOMException in ActionStartXCommand ", je.getMessage(), je);
168                    handleError(context, wfJob, wfAction);
169                    return null;
170                }
171                catch (Exception ex) {
172                    context.setErrorInfo(EL_ERROR, ex.getMessage());
173                    LOG.warn("Exception in ActionStartXCommand ", ex.getMessage(), ex);
174                    handleError(context, wfJob, wfAction);
175                    return null;
176                }
177                wfAction.setErrorInfo(null, null);
178                incrActionCounter(wfAction.getType(), 1);
179    
180                Instrumentation.Cron cron = new Instrumentation.Cron();
181                cron.start();
182                executor.start(context, wfAction);
183                cron.stop();
184                FaultInjection.activate("org.apache.oozie.command.SkipCommitFaultInjection");
185                addActionCron(wfAction.getType(), cron);
186    
187                wfAction.setRetries(0);
188                if (wfAction.isExecutionComplete()) {
189                    if (!context.isExecuted()) {
190                        LOG.warn(XLog.OPS, "Action Completed, ActionExecutor [{0}] must call setExecutionData()", executor
191                                .getType());
192                        wfAction.setErrorInfo(EXEC_DATA_MISSING,
193                                "Execution Complete, but Execution Data Missing from Action");
194                        failJob(context);
195                        jpaService.execute(new WorkflowActionUpdateJPAExecutor(wfAction));
196                        jpaService.execute(new WorkflowJobUpdateJPAExecutor(wfJob));
197                        return null;
198                    }
199                    wfAction.setPending();
200                    queue(new ActionEndXCommand(wfAction.getId(), wfAction.getType()));
201                }
202                else {
203                    if (!context.isStarted()) {
204                        LOG.warn(XLog.OPS, "Action Started, ActionExecutor [{0}] must call setStartData()", executor
205                                .getType());
206                        wfAction.setErrorInfo(START_DATA_MISSING, "Execution Started, but Start Data Missing from Action");
207                        failJob(context);
208                        jpaService.execute(new WorkflowActionUpdateJPAExecutor(wfAction));
209                        jpaService.execute(new WorkflowJobUpdateJPAExecutor(wfJob));
210                        return null;
211                    }
212                    queue(new NotificationXCommand(wfJob, wfAction));
213                }
214    
215                LOG.warn(XLog.STD, "[***" + wfAction.getId() + "***]" + "Action status=" + wfAction.getStatusStr());
216    
217                jpaService.execute(new WorkflowActionUpdateJPAExecutor(wfAction));
218                jpaService.execute(new WorkflowJobUpdateJPAExecutor(wfJob));
219                // Add SLA status event (STARTED) for WF_ACTION
220                SLADbXOperations.writeStausEvent(wfAction.getSlaXml(), wfAction.getId(), Status.STARTED,
221                        SlaAppType.WORKFLOW_ACTION);
222                LOG.warn(XLog.STD, "[***" + wfAction.getId() + "***]" + "Action updated in DB!");
223    
224            }
225            catch (ActionExecutorException ex) {
226                LOG.warn("Error starting action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]",
227                        wfAction.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage(), ex);
228                wfAction.setErrorInfo(ex.getErrorCode(), ex.getMessage());
229                switch (ex.getErrorType()) {
230                    case TRANSIENT:
231                        if (!handleTransient(context, executor, WorkflowAction.Status.START_RETRY)) {
232                            handleNonTransient(context, executor, WorkflowAction.Status.START_MANUAL);
233                            wfAction.setPendingAge(new Date());
234                            wfAction.setRetries(0);
235                            wfAction.setStartTime(null);
236                        }
237                        break;
238                    case NON_TRANSIENT:
239                        handleNonTransient(context, executor, WorkflowAction.Status.START_MANUAL);
240                        break;
241                    case ERROR:
242                        handleError(context, executor, WorkflowAction.Status.ERROR.toString(), true,
243                                WorkflowAction.Status.DONE);
244                        break;
245                    case FAILED:
246                        try {
247                            failJob(context);
248                            // update coordinator action
249                            new CoordActionUpdateXCommand(wfJob, 3).call();
250                            new WfEndXCommand(wfJob).call(); //To delete the WF temp dir
251                            SLADbXOperations.writeStausEvent(wfAction.getSlaXml(), wfAction.getId(), Status.FAILED,
252                                    SlaAppType.WORKFLOW_ACTION);
253                            SLADbXOperations.writeStausEvent(wfJob.getSlaXml(), wfJob.getId(), Status.FAILED,
254                                    SlaAppType.WORKFLOW_JOB);
255                        }
256                        catch (XException x) {
257                            LOG.warn("ActionStartXCommand - case:FAILED ", x.getMessage());
258                        }
259                        break;
260                }
261                try {
262                    jpaService.execute(new WorkflowActionUpdateJPAExecutor(wfAction));
263                    jpaService.execute(new WorkflowJobUpdateJPAExecutor(wfJob));
264                }
265                catch (JPAExecutorException je) {
266                    throw new CommandException(je);
267                }
268            }
269            catch (JPAExecutorException je) {
270                throw new CommandException(je);
271            }
272    
273            LOG.debug("ENDED ActionStartXCommand for wf actionId=" + actionId + ", jobId=" + jobId);
274    
275            return null;
276        }
277    
278        private void handleError(ActionExecutorContext context, WorkflowJobBean workflow, WorkflowActionBean action)
279                throws CommandException {
280            failJob(context);
281            try {
282                jpaService.execute(new WorkflowActionUpdateJPAExecutor(action));
283                jpaService.execute(new WorkflowJobUpdateJPAExecutor(workflow));
284            }
285            catch (JPAExecutorException je) {
286                throw new CommandException(je);
287            }
288            SLADbXOperations.writeStausEvent(action.getSlaXml(), action.getId(), Status.FAILED, SlaAppType.WORKFLOW_ACTION);
289            SLADbXOperations.writeStausEvent(workflow.getSlaXml(), workflow.getId(), Status.FAILED, SlaAppType.WORKFLOW_JOB);
290            // update coordinator action
291            new CoordActionUpdateXCommand(workflow, 3).call();
292            new WfEndXCommand(wfJob).call(); //To delete the WF temp dir
293            return;
294        }
295    
296    }