001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.wf;
016    
017    import java.io.IOException;
018    import java.io.StringReader;
019    import java.net.URI;
020    import java.net.URISyntaxException;
021    import java.util.Date;
022    import java.util.Properties;
023    
024    import org.apache.hadoop.conf.Configuration;
025    import org.apache.hadoop.fs.FileSystem;
026    import org.apache.hadoop.fs.Path;
027    import org.apache.oozie.DagELFunctions;
028    import org.apache.oozie.ErrorCode;
029    import org.apache.oozie.WorkflowActionBean;
030    import org.apache.oozie.WorkflowJobBean;
031    import org.apache.oozie.action.ActionExecutor;
032    import org.apache.oozie.client.WorkflowAction;
033    import org.apache.oozie.client.WorkflowJob;
034    import org.apache.oozie.command.CommandException;
035    import org.apache.oozie.command.coord.CoordActionUpdateXCommand;
036    import org.apache.oozie.service.CallbackService;
037    import org.apache.oozie.service.ELService;
038    import org.apache.oozie.service.HadoopAccessorException;
039    import org.apache.oozie.service.HadoopAccessorService;
040    import org.apache.oozie.service.JPAService;
041    import org.apache.oozie.service.Services;
042    import org.apache.oozie.util.ELEvaluator;
043    import org.apache.oozie.util.InstrumentUtils;
044    import org.apache.oozie.util.Instrumentation;
045    import org.apache.oozie.util.XConfiguration;
046    import org.apache.oozie.workflow.WorkflowException;
047    import org.apache.oozie.workflow.WorkflowInstance;
048    import org.apache.oozie.workflow.lite.LiteWorkflowInstance;
049    
050    /**
051     * Base class for Action execution commands. Provides common functionality to handle different types of errors while
052     * attempting to start or end an action.
053     */
054    public abstract class ActionXCommand<T> extends WorkflowXCommand<Void> {
055        private static final String INSTRUMENTATION_GROUP = "action.executors";
056    
057        protected static final String INSTR_FAILED_JOBS_COUNTER = "failed";
058    
059        protected static final String RECOVERY_ID_SEPARATOR = "@";
060    
061        public ActionXCommand(String name, String type, int priority) {
062            super(name, type, priority);
063        }
064    
065        /**
066         * Takes care of Transient failures. Sets the action status to retry and increments the retry count if not enough
067         * attempts have been made. Otherwise returns false.
068         *
069         * @param context the execution context.
070         * @param executor the executor instance being used.
071         * @param status the status to be set for the action.
072         * @return true if the action is scheduled for another retry. false if the number of retries has exceeded the
073         *         maximum number of configured retries.
074         * @throws CommandException thrown if unable to handle transient
075         */
076        protected boolean handleTransient(ActionExecutor.Context context, ActionExecutor executor,
077                WorkflowAction.Status status) throws CommandException {
078            LOG.debug("Attempting to retry");
079            ActionExecutorContext aContext = (ActionExecutorContext) context;
080            WorkflowActionBean action = (WorkflowActionBean) aContext.getAction();
081            incrActionErrorCounter(action.getType(), "transient", 1);
082    
083            int actionRetryCount = action.getRetries();
084            if (actionRetryCount >= executor.getMaxRetries()) {
085                LOG.warn("Exceeded max retry count [{0}]. Suspending Job", executor.getMaxRetries());
086                return false;
087            }
088            else {
089                action.setStatus(status);
090                action.setPending();
091                action.incRetries();
092                long retryDelayMillis = executor.getRetryInterval() * 1000;
093                action.setPendingAge(new Date(System.currentTimeMillis() + retryDelayMillis));
094                LOG.info("Next Retry, Attempt Number [{0}] in [{1}] milliseconds", actionRetryCount + 1, retryDelayMillis);
095                this.resetUsed();
096                queue(this, retryDelayMillis);
097                return true;
098            }
099        }
100    
101        /**
102         * Takes care of non transient failures. The job is suspended, and the state of the action is changed to *MANUAL and
103         * set pending flag of action to false
104         *
105         * @param context the execution context.
106         * @param executor the executor instance being used.
107         * @param status the status to be set for the action.
108         * @throws CommandException thrown if unable to suspend job
109         */
110        protected void handleNonTransient(ActionExecutor.Context context, ActionExecutor executor,
111                WorkflowAction.Status status) throws CommandException {
112            ActionExecutorContext aContext = (ActionExecutorContext) context;
113            WorkflowActionBean action = (WorkflowActionBean) aContext.getAction();
114            incrActionErrorCounter(action.getType(), "nontransient", 1);
115            WorkflowJobBean workflow = (WorkflowJobBean) context.getWorkflow();
116            String id = workflow.getId();
117            action.setStatus(status);
118            action.resetPendingOnly();
119            LOG.warn("Suspending Workflow Job id=" + id);
120            try {
121                SuspendXCommand.suspendJob(Services.get().get(JPAService.class), workflow, id, action.getId());
122            }
123            catch (Exception e) {
124                throw new CommandException(ErrorCode.E0727, e.getMessage());
125            }
126            finally {
127                // update coordinator action
128                new CoordActionUpdateXCommand(workflow, 3).call();
129            }
130        }
131    
132        /**
133         * Takes care of errors. </p> For errors while attempting to start the action, the job state is updated and an
134         * {@link ActionEndCommand} is queued. </p> For errors while attempting to end the action, the job state is updated.
135         * </p>
136         *
137         * @param context the execution context.
138         * @param executor the executor instance being used.
139         * @param message
140         * @param isStart whether the error was generated while starting or ending an action.
141         * @param status the status to be set for the action.
142         * @throws CommandException thrown if unable to handle action error
143         */
144        protected void handleError(ActionExecutor.Context context, ActionExecutor executor, String message,
145                boolean isStart, WorkflowAction.Status status) throws CommandException {
146            LOG.warn("Setting Action Status to [{0}]", status);
147            ActionExecutorContext aContext = (ActionExecutorContext) context;
148            WorkflowActionBean action = (WorkflowActionBean) aContext.getAction();
149            incrActionErrorCounter(action.getType(), "error", 1);
150            action.setPending();
151            if (isStart) {
152                action.setExecutionData(message, null);
153                queue(new ActionEndXCommand(action.getId(), action.getType()));
154            }
155            else {
156                action.setEndData(status, WorkflowAction.Status.ERROR.toString());
157            }
158        }
159    
160        /**
161         * Fail the job due to failed action
162         *
163         * @param context the execution context.
164         * @throws CommandException thrown if unable to fail job
165         */
166        public void failJob(ActionExecutor.Context context) throws CommandException {
167            ActionExecutorContext aContext = (ActionExecutorContext) context;
168            WorkflowActionBean action = (WorkflowActionBean) aContext.getAction();
169            incrActionErrorCounter(action.getType(), "failed", 1);
170            WorkflowJobBean workflow = (WorkflowJobBean) context.getWorkflow();
171            LOG.warn("Failing Job due to failed action [{0}]", action.getName());
172            try {
173                workflow.getWorkflowInstance().fail(action.getName());
174                WorkflowInstance wfInstance = workflow.getWorkflowInstance();
175                ((LiteWorkflowInstance) wfInstance).setStatus(WorkflowInstance.Status.FAILED);
176                workflow.setWorkflowInstance(wfInstance);
177                workflow.setStatus(WorkflowJob.Status.FAILED);
178                action.setStatus(WorkflowAction.Status.FAILED);
179                action.resetPending();
180                queue(new NotificationXCommand(workflow, action));
181                queue(new KillXCommand(workflow.getId()));
182                InstrumentUtils.incrJobCounter(INSTR_FAILED_JOBS_COUNTER, 1, getInstrumentation());
183            }
184            catch (WorkflowException ex) {
185                throw new CommandException(ex);
186            }
187        }
188    
189        private void incrActionErrorCounter(String type, String error, int count) {
190            getInstrumentation().incr(INSTRUMENTATION_GROUP, type + "#ex." + error, count);
191        }
192    
193        protected void incrActionCounter(String type, int count) {
194            getInstrumentation().incr(INSTRUMENTATION_GROUP, type + "#" + getName(), count);
195        }
196    
197        protected void addActionCron(String type, Instrumentation.Cron cron) {
198            getInstrumentation().addCron(INSTRUMENTATION_GROUP, type + "#" + getName(), cron);
199        }
200    
201        /**
202         * Workflow action executor context
203         *
204         */
205        public static class ActionExecutorContext implements ActionExecutor.Context {
206            private final WorkflowJobBean workflow;
207            private Configuration protoConf;
208            private final WorkflowActionBean action;
209            private final boolean isRetry;
210            private boolean started;
211            private boolean ended;
212            private boolean executed;
213    
214            public ActionExecutorContext(WorkflowJobBean workflow, WorkflowActionBean action, boolean isRetry) {
215                this.workflow = workflow;
216                this.action = action;
217                this.isRetry = isRetry;
218                try {
219                    protoConf = new XConfiguration(new StringReader(workflow.getProtoActionConf()));
220                }
221                catch (IOException ex) {
222                    throw new RuntimeException("It should not happen", ex);
223                }
224            }
225    
226            public String getCallbackUrl(String externalStatusVar) {
227                return Services.get().get(CallbackService.class).createCallBackUrl(action.getId(), externalStatusVar);
228            }
229    
230            public Configuration getProtoActionConf() {
231                return protoConf;
232            }
233    
234            public WorkflowJob getWorkflow() {
235                return workflow;
236            }
237    
238            public WorkflowAction getAction() {
239                return action;
240            }
241    
242            public ELEvaluator getELEvaluator() {
243                ELEvaluator evaluator = Services.get().get(ELService.class).createEvaluator("workflow");
244                DagELFunctions.configureEvaluator(evaluator, workflow, action);
245                return evaluator;
246            }
247    
248            public void setVar(String name, String value) {
249                name = action.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + name;
250                WorkflowInstance wfInstance = workflow.getWorkflowInstance();
251                wfInstance.setVar(name, value);
252                workflow.setWorkflowInstance(wfInstance);
253            }
254    
255            public String getVar(String name) {
256                name = action.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + name;
257                return workflow.getWorkflowInstance().getVar(name);
258            }
259    
260            public void setStartData(String externalId, String trackerUri, String consoleUrl) {
261                action.setStartData(externalId, trackerUri, consoleUrl);
262                started = true;
263            }
264    
265            public void setExecutionData(String externalStatus, Properties actionData) {
266                action.setExecutionData(externalStatus, actionData);
267                executed = true;
268            }
269    
270            public void setEndData(WorkflowAction.Status status, String signalValue) {
271                action.setEndData(status, signalValue);
272                ended = true;
273            }
274    
275            public boolean isRetry() {
276                return isRetry;
277            }
278    
279            /**
280             * Returns whether setStartData has been called or not.
281             *
282             * @return true if start completion info has been set.
283             */
284            public boolean isStarted() {
285                return started;
286            }
287    
288            /**
289             * Returns whether setExecutionData has been called or not.
290             *
291             * @return true if execution completion info has been set, otherwise false.
292             */
293            public boolean isExecuted() {
294                return executed;
295            }
296    
297            /**
298             * Returns whether setEndData has been called or not.
299             *
300             * @return true if end completion info has been set.
301             */
302            public boolean isEnded() {
303                return ended;
304            }
305    
306            public void setExternalStatus(String externalStatus) {
307                action.setExternalStatus(externalStatus);
308            }
309    
310            @Override
311            public String getRecoveryId() {
312                return action.getId() + RECOVERY_ID_SEPARATOR + workflow.getRun();
313            }
314    
315            /* (non-Javadoc)
316             * @see org.apache.oozie.action.ActionExecutor.Context#getActionDir()
317             */
318            public Path getActionDir() throws HadoopAccessorException, IOException, URISyntaxException {
319                String name = getWorkflow().getId() + "/" + action.getName() + "--" + action.getType();
320                FileSystem fs = getAppFileSystem();
321                String actionDirPath = Services.get().getSystemId() + "/" + name;
322                Path fqActionDir = new Path(fs.getHomeDirectory(), actionDirPath);
323                return fqActionDir;
324            }
325    
326            /* (non-Javadoc)
327             * @see org.apache.oozie.action.ActionExecutor.Context#getAppFileSystem()
328             */
329            public FileSystem getAppFileSystem() throws HadoopAccessorException, IOException, URISyntaxException {
330                WorkflowJob workflow = getWorkflow();
331                XConfiguration jobConf = new XConfiguration(new StringReader(workflow.getConf()));
332                Configuration fsConf = new Configuration();
333                XConfiguration.copy(jobConf, fsConf);
334                return Services.get().get(HadoopAccessorService.class).createFileSystem(workflow.getUser(),
335                        workflow.getGroup(), new URI(getWorkflow().getAppPath()), fsConf);
336    
337            }
338    
339            /* (non-Javadoc)
340             * @see org.apache.oozie.action.ActionExecutor.Context#setErrorInfo(java.lang.String, java.lang.String)
341             */
342            @Override
343            public void setErrorInfo(String str, String exMsg) {
344                action.setErrorInfo(str, exMsg);
345            }
346        }
347    
348    }