001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.coord;
016    
017    import java.io.IOException;
018    import java.io.StringReader;
019    import java.util.ArrayList;
020    import java.util.Date;
021    import java.util.HashSet;
022    import java.util.List;
023    import java.util.Set;
024    
025    import org.apache.hadoop.conf.Configuration;
026    import org.apache.hadoop.fs.Path;
027    import org.apache.oozie.CoordinatorActionBean;
028    import org.apache.oozie.CoordinatorActionInfo;
029    import org.apache.oozie.CoordinatorJobBean;
030    import org.apache.oozie.ErrorCode;
031    import org.apache.oozie.XException;
032    import org.apache.oozie.action.ActionExecutorException;
033    import org.apache.oozie.action.hadoop.FsActionExecutor;
034    import org.apache.oozie.client.CoordinatorAction;
035    import org.apache.oozie.client.CoordinatorJob;
036    import org.apache.oozie.client.Job;
037    import org.apache.oozie.client.SLAEvent.SlaAppType;
038    import org.apache.oozie.client.rest.RestConstants;
039    import org.apache.oozie.command.CommandException;
040    import org.apache.oozie.command.PreconditionException;
041    import org.apache.oozie.command.RerunTransitionXCommand;
042    import org.apache.oozie.command.bundle.BundleStatusUpdateXCommand;
043    import org.apache.oozie.coord.CoordELFunctions;
044    import org.apache.oozie.executor.jpa.CoordActionGetJPAExecutor;
045    import org.apache.oozie.executor.jpa.CoordJobGetActionForNominalTimeJPAExecutor;
046    import org.apache.oozie.executor.jpa.CoordJobGetActionsForDatesJPAExecutor;
047    import org.apache.oozie.executor.jpa.CoordJobGetJPAExecutor;
048    import org.apache.oozie.executor.jpa.CoordJobUpdateJPAExecutor;
049    import org.apache.oozie.executor.jpa.JPAExecutorException;
050    import org.apache.oozie.service.JPAService;
051    import org.apache.oozie.service.Services;
052    import org.apache.oozie.util.DateUtils;
053    import org.apache.oozie.util.InstrumentUtils;
054    import org.apache.oozie.util.LogUtils;
055    import org.apache.oozie.util.ParamChecker;
056    import org.apache.oozie.util.StatusUtils;
057    import org.apache.oozie.util.XConfiguration;
058    import org.apache.oozie.util.XLog;
059    import org.apache.oozie.util.XmlUtils;
060    import org.apache.oozie.util.db.SLADbOperations;
061    import org.jdom.Element;
062    import org.jdom.JDOMException;
063    
064    /**
065     * Rerun coordinator actions by a list of dates or ids. User can specify if refresh or noCleanup.
066     * <p/>
067     * The "rerunType" can be set as {@link RestConstants.JOB_COORD_RERUN_DATE} or
068     * {@link RestConstants.JOB_COORD_RERUN_ACTION}.
069     * <p/>
070     * The "refresh" is used to indicate if user wants to refresh an action's input and output events.
071     * <p/>
072     * The "noCleanup" is used to indicate if user wants to cleanup output events for given rerun actions
073     */
074    public class CoordRerunXCommand extends RerunTransitionXCommand<CoordinatorActionInfo> {
075    
076        private String rerunType;
077        private String scope;
078        private boolean refresh;
079        private boolean noCleanup;
080        private CoordinatorJobBean coordJob = null;
081        private JPAService jpaService = null;
082        protected boolean prevPending;
083    
084        /**
085         * The constructor for class {@link CoordRerunXCommand}
086         *
087         * @param jobId the job id
088         * @param rerunType rerun type {@link RestConstants.JOB_COORD_RERUN_DATE} or {@link RestConstants.JOB_COORD_RERUN_ACTION}
089         * @param scope the rerun scope for given rerunType separated by ","
090         * @param refresh true if user wants to refresh input/output dataset urls
091         * @param noCleanup false if user wants to cleanup output events for given rerun actions
092         */
093        public CoordRerunXCommand(String jobId, String rerunType, String scope, boolean refresh, boolean noCleanup) {
094            super("coord_rerun", "coord_rerun", 1);
095            this.jobId = ParamChecker.notEmpty(jobId, "jobId");
096            this.rerunType = ParamChecker.notEmpty(rerunType, "rerunType");
097            this.scope = ParamChecker.notEmpty(scope, "scope");
098            this.refresh = refresh;
099            this.noCleanup = noCleanup;
100        }
101    
102        /**
103         * Get the list of actions for given id ranges
104         *
105         * @param jobId coordinator job id
106         * @param scope the id range to rerun separated by ","
107         * @return the list of all actions to rerun
108         * @throws CommandException thrown if failed to get coordinator actions by given id range
109         */
110        private List<CoordinatorActionBean> getCoordActionsFromIds(String jobId, String scope) throws CommandException {
111            ParamChecker.notEmpty(jobId, "jobId");
112            ParamChecker.notEmpty(scope, "scope");
113    
114            Set<String> actions = new HashSet<String>();
115            String[] list = scope.split(",");
116            for (String s : list) {
117                s = s.trim();
118                if (s.contains("-")) {
119                    String[] range = s.split("-");
120                    if (range.length != 2) {
121                        throw new CommandException(ErrorCode.E0302, "format is wrong for action's range '" + s + "'");
122                    }
123                    int start;
124                    int end;
125                    try {
126                        start = Integer.parseInt(range[0].trim());
127                        end = Integer.parseInt(range[1].trim());
128                        if (start > end) {
129                            throw new CommandException(ErrorCode.E0302, "format is wrong for action's range '" + s + "'");
130                        }
131                    }
132                    catch (NumberFormatException ne) {
133                        throw new CommandException(ErrorCode.E0302, ne);
134                    }
135                    for (int i = start; i <= end; i++) {
136                        actions.add(jobId + "@" + i);
137                    }
138                }
139                else {
140                    try {
141                        Integer.parseInt(s);
142                    }
143                    catch (NumberFormatException ne) {
144                        throw new CommandException(ErrorCode.E0302, "format is wrong for action id'" + s
145                                + "'. Integer only.");
146                    }
147                    actions.add(jobId + "@" + s);
148                }
149            }
150    
151            List<CoordinatorActionBean> coordActions = new ArrayList<CoordinatorActionBean>();
152            for (String id : actions) {
153                CoordinatorActionBean coordAction;
154                try {
155                    coordAction = jpaService.execute(new CoordActionGetJPAExecutor(id));
156                }
157                catch (JPAExecutorException je) {
158                    throw new CommandException(je);
159                }
160                coordActions.add(coordAction);
161                LOG.debug("Rerun coordinator for actionId='" + id + "'");
162            }
163            return coordActions;
164        }
165    
166        /**
167         * Get the list of actions for given date ranges
168         *
169         * @param jobId coordinator job id
170         * @param scope the date range to rerun separated by ","
171         * @return the list of dates to rerun
172         * @throws CommandException thrown if failed to get coordinator actions by given date range
173         */
174        private List<CoordinatorActionBean> getCoordActionsFromDates(String jobId, String scope) throws CommandException {
175            ParamChecker.notEmpty(jobId, "jobId");
176            ParamChecker.notEmpty(scope, "scope");
177    
178            Set<CoordinatorActionBean> actionSet = new HashSet<CoordinatorActionBean>();
179            String[] list = scope.split(",");
180            for (String s : list) {
181                s = s.trim();
182                if (s.contains("::")) {
183                    String[] dateRange = s.split("::");
184                    if (dateRange.length != 2) {
185                        throw new CommandException(ErrorCode.E0302, "format is wrong for date's range '" + s + "'");
186                    }
187                    Date start;
188                    Date end;
189                    try {
190                        start = DateUtils.parseDateUTC(dateRange[0].trim());
191                        end = DateUtils.parseDateUTC(dateRange[1].trim());
192                        if (start.after(end)) {
193                            throw new CommandException(ErrorCode.E0302, "start date is older than end date: '" + s + "'");
194                        }
195                    }
196                    catch (Exception e) {
197                        throw new CommandException(ErrorCode.E0302, e);
198                    }
199    
200                    List<CoordinatorActionBean> listOfActions = getActionIdsFromDateRange(jobId, start, end);
201                    actionSet.addAll(listOfActions);
202                }
203                else {
204                    try {
205                        Date date = DateUtils.parseDateUTC(s.trim());
206                        CoordinatorActionBean coordAction = jpaService
207                                .execute(new CoordJobGetActionForNominalTimeJPAExecutor(jobId, date));
208                        actionSet.add(coordAction);
209                    }
210                    catch (JPAExecutorException e) {
211                        throw new CommandException(e);
212                    }
213                    catch (Exception e) {
214                        throw new CommandException(ErrorCode.E0302, e);
215                    }
216                }
217            }
218    
219            List<CoordinatorActionBean> coordActions = new ArrayList<CoordinatorActionBean>();
220            for (CoordinatorActionBean coordAction : actionSet) {
221                coordActions.add(coordAction);
222                LOG.debug("Rerun coordinator for actionId='" + coordAction.getId() + "'");
223            }
224            return coordActions;
225        }
226    
227        /**
228         * Get coordinator action ids between given start and end time
229         *
230         * @param jobId coordinator job id
231         * @param start start time
232         * @param end end time
233         * @return a list of coordinator actions belong to the range of start and end time
234         * @throws CommandException thrown if failed to get coordinator actions
235         */
236        private List<CoordinatorActionBean> getActionIdsFromDateRange(String jobId, Date start, Date end)
237                throws CommandException {
238            List<CoordinatorActionBean> list;
239            try {
240                list = jpaService.execute(new CoordJobGetActionsForDatesJPAExecutor(jobId, start, end));
241            }
242            catch (JPAExecutorException je) {
243                throw new CommandException(je);
244            }
245            return list;
246        }
247    
248        /**
249         * Check if all given actions are eligible to rerun.
250         *
251         * @param actions list of CoordinatorActionBean
252         * @return true if all actions are eligible to rerun
253         */
254        private boolean checkAllActionsRunnable(List<CoordinatorActionBean> coordActions) {
255            boolean ret = false;
256            for (CoordinatorActionBean coordAction : coordActions) {
257                ret = true;
258                if (!coordAction.isTerminalStatus()) {
259                    ret = false;
260                    break;
261                }
262            }
263            return ret;
264        }
265    
266        /**
267         * Cleanup output-events directories
268         *
269         * @param eAction coordinator action xml
270         * @param user user name
271         * @param group group name
272         */
273        @SuppressWarnings("unchecked")
274        private void cleanupOutputEvents(Element eAction, String user, String group) {
275            Element outputList = eAction.getChild("output-events", eAction.getNamespace());
276            if (outputList != null) {
277                for (Element data : (List<Element>) outputList.getChildren("data-out", eAction.getNamespace())) {
278                    if (data.getChild("uris", data.getNamespace()) != null) {
279                        String uris = data.getChild("uris", data.getNamespace()).getTextTrim();
280                        if (uris != null) {
281                            String[] uriArr = uris.split(CoordELFunctions.INSTANCE_SEPARATOR);
282                            FsActionExecutor fsAe = new FsActionExecutor();
283                            for (String uri : uriArr) {
284                                Path path = new Path(uri);
285                                try {
286                                    fsAe.delete(user, group, path);
287                                    LOG.debug("Cleanup the output dir " + path);
288                                }
289                                catch (ActionExecutorException ae) {
290                                    LOG.warn("Failed to cleanup the output dir " + uri, ae);
291                                }
292                            }
293                        }
294    
295                    }
296                }
297            }
298            else {
299                LOG.info("No output-events defined in coordinator xml. Therefore nothing to cleanup");
300            }
301        }
302    
303        /**
304         * Refresh an action's input and ouput events.
305         *
306         * @param coordJob coordinator job bean
307         * @param coordAction coordinator action bean
308         * @throws Exception thrown if failed to materialize coordinator action
309         */
310        private void refreshAction(CoordinatorJobBean coordJob, CoordinatorActionBean coordAction) throws Exception {
311            Configuration jobConf = null;
312            try {
313                jobConf = new XConfiguration(new StringReader(coordJob.getConf()));
314            }
315            catch (IOException ioe) {
316                LOG.warn("Configuration parse error. read from DB :" + coordJob.getConf(), ioe);
317                throw new CommandException(ErrorCode.E1005, ioe);
318            }
319            String jobXml = coordJob.getJobXml();
320            Element eJob = XmlUtils.parseXml(jobXml);
321            Date actualTime = new Date();
322            String actionXml = CoordCommandUtils.materializeOneInstance(jobId, dryrun, (Element) eJob.clone(), coordAction
323                    .getNominalTime(), actualTime, coordAction.getActionNumber(), jobConf, coordAction);
324            LOG.debug("Refresh Action actionId=" + coordAction.getId() + ", actionXml="
325                    + XmlUtils.prettyPrint(actionXml).toString());
326            coordAction.setActionXml(actionXml);
327        }
328    
329        /**
330         * Update an action into database table
331         *
332         * @param coordJob coordinator job bean
333         * @param coordAction coordinator action bean
334         * @param actionXml coordinator action xml
335         * @throws Exception thrown failed to update coordinator action bean or unable to write sla registration event
336         */
337        private void updateAction(CoordinatorJobBean coordJob, CoordinatorActionBean coordAction, String actionXml)
338                throws Exception {
339            LOG.debug("updateAction for actionId=" + coordAction.getId());
340            if (coordAction.getStatus() == CoordinatorAction.Status.TIMEDOUT) {
341                LOG.debug("Updating created time for TIMEDOUT action id =" + coordAction.getId());
342                coordAction.setCreatedTime(new Date());
343            }
344            coordAction.setStatus(CoordinatorAction.Status.WAITING);
345            coordAction.setExternalId("");
346            coordAction.setExternalStatus("");
347            coordAction.setRerunTime(new Date());
348            coordAction.setLastModifiedTime(new Date());
349            jpaService.execute(new org.apache.oozie.executor.jpa.CoordActionUpdateJPAExecutor(coordAction));
350            writeActionRegistration(coordAction.getActionXml(), coordAction, coordJob.getUser(), coordJob.getGroup());
351        }
352    
353        /**
354         * Create SLA RegistrationEvent
355         *
356         * @param actionXml action xml
357         * @param actionBean coordinator action bean
358         * @param user user name
359         * @param group group name
360         * @throws Exception thrown if unable to write sla registration event
361         */
362        private void writeActionRegistration(String actionXml, CoordinatorActionBean actionBean, String user, String group)
363                throws Exception {
364            Element eAction = XmlUtils.parseXml(actionXml);
365            Element eSla = eAction.getChild("action", eAction.getNamespace()).getChild("info", eAction.getNamespace("sla"));
366            SLADbOperations.writeSlaRegistrationEvent(eSla, actionBean.getId(), SlaAppType.COORDINATOR_ACTION, user, group,
367                    LOG);
368        }
369    
370        /* (non-Javadoc)
371         * @see org.apache.oozie.command.XCommand#getEntityKey()
372         */
373        @Override
374        protected String getEntityKey() {
375            return jobId;
376        }
377    
378        /* (non-Javadoc)
379         * @see org.apache.oozie.command.XCommand#isLockRequired()
380         */
381        @Override
382        protected boolean isLockRequired() {
383            return true;
384        }
385    
386        /* (non-Javadoc)
387         * @see org.apache.oozie.command.XCommand#loadState()
388         */
389        @Override
390        protected void loadState() throws CommandException {
391            jpaService = Services.get().get(JPAService.class);
392            if (jpaService == null) {
393                throw new CommandException(ErrorCode.E0610);
394            }
395            try {
396                coordJob = jpaService.execute(new CoordJobGetJPAExecutor(jobId));
397                prevPending = coordJob.isPending();
398            }
399            catch (JPAExecutorException je) {
400                throw new CommandException(je);
401            }
402            LogUtils.setLogInfo(coordJob, logInfo);
403        }
404    
405        /* (non-Javadoc)
406         * @see org.apache.oozie.command.XCommand#verifyPrecondition()
407         */
408        @Override
409        protected void verifyPrecondition() throws CommandException, PreconditionException {
410            if (coordJob.getStatus() == CoordinatorJob.Status.KILLED
411                    || coordJob.getStatus() == CoordinatorJob.Status.FAILED) {
412                LOG.info("CoordRerunXCommand is not able to run, job status=" + coordJob.getStatus() + ", jobid=" + jobId);
413                throw new CommandException(ErrorCode.E1018,
414                        "coordinator job is killed or failed so all actions are not eligible to rerun!");
415            }
416    
417            // no actioins have been created for PREP job
418            if (coordJob.getStatus() == CoordinatorJob.Status.PREP) {
419                LOG.info("CoordRerunXCommand is not able to run, job status=" + coordJob.getStatus() + ", jobid=" + jobId);
420                throw new CommandException(ErrorCode.E1018,
421                        "coordinator job is PREP so no actions are materialized to rerun!");
422            }
423        }
424    
425        @Override
426        protected void eagerVerifyPrecondition() throws CommandException, PreconditionException {
427            verifyPrecondition();
428        }
429    
430        @Override
431        public void rerunChildren() throws CommandException {
432            boolean isError = false;
433            try {
434                CoordinatorActionInfo coordInfo = null;
435                InstrumentUtils.incrJobCounter(getName(), 1, getInstrumentation());
436                List<CoordinatorActionBean> coordActions;
437                if (rerunType.equals(RestConstants.JOB_COORD_RERUN_DATE)) {
438                    coordActions = getCoordActionsFromDates(jobId, scope);
439                }
440                else if (rerunType.equals(RestConstants.JOB_COORD_RERUN_ACTION)) {
441                    coordActions = getCoordActionsFromIds(jobId, scope);
442                }
443                else {
444                    isError = true;
445                    throw new CommandException(ErrorCode.E1018, "date or action expected.");
446                }
447                if (checkAllActionsRunnable(coordActions)) {
448                    for (CoordinatorActionBean coordAction : coordActions) {
449                        String actionXml = coordAction.getActionXml();
450                        if (!noCleanup) {
451                            Element eAction = XmlUtils.parseXml(actionXml);
452                            cleanupOutputEvents(eAction, coordJob.getUser(), coordJob.getGroup());
453                        }
454                        if (refresh) {
455                            refreshAction(coordJob, coordAction);
456                        }
457                        updateAction(coordJob, coordAction, actionXml);
458    
459                        queue(new CoordActionNotificationXCommand(coordAction), 100);
460                        queue(new CoordActionInputCheckXCommand(coordAction.getId()), 100);
461                    }
462                }
463                else {
464                    isError = true;
465                    throw new CommandException(ErrorCode.E1018, "part or all actions are not eligible to rerun!");
466                }
467                coordInfo = new CoordinatorActionInfo(coordActions);
468    
469                ret = coordInfo;
470            }
471            catch (XException xex) {
472                isError = true;
473                throw new CommandException(xex);
474            }
475            catch (JDOMException jex) {
476                isError = true;
477                throw new CommandException(ErrorCode.E0700, jex);
478            }
479            catch (Exception ex) {
480                isError = true;
481                throw new CommandException(ErrorCode.E1018, ex);
482            }
483            finally{
484                if(isError){
485                    transitToPrevious();
486                }
487            }
488        }
489    
490        /*
491         * (non-Javadoc)
492         * @see org.apache.oozie.command.TransitionXCommand#getJob()
493         */
494        @Override
495        public Job getJob() {
496            return coordJob;
497        }
498    
499        @Override
500        public void notifyParent() throws CommandException {
501            //update bundle action
502            if (getPrevStatus() != null && coordJob.getBundleId() != null) {
503                BundleStatusUpdateXCommand bundleStatusUpdate = new BundleStatusUpdateXCommand(coordJob, getPrevStatus());
504                bundleStatusUpdate.call();
505            }
506        }
507    
508        @Override
509        public void updateJob() throws CommandException {
510            try {
511                // rerun a paused coordinator job will keep job status at paused and pending at previous pending
512                if (getPrevStatus()!= null && getPrevStatus().equals(Job.Status.PAUSED)) {
513                    coordJob.setStatus(Job.Status.PAUSED);
514                    if (prevPending) {
515                        coordJob.setPending();
516                    } else {
517                        coordJob.resetPending();
518                    }
519                }
520    
521                jpaService.execute(new CoordJobUpdateJPAExecutor(coordJob));
522            }
523            catch (JPAExecutorException je) {
524                throw new CommandException(je);
525            }
526        }
527    
528        /* (non-Javadoc)
529         * @see org.apache.oozie.command.RerunTransitionXCommand#getLog()
530         */
531        @Override
532        public XLog getLog() {
533            return LOG;
534        }
535    
536        @Override
537        public final void transitToNext() {
538            prevStatus = coordJob.getStatus();
539            coordJob.setStatus(Job.Status.RUNNING);
540            // used for backward support of coordinator 0.1 schema
541            coordJob.setStatus(StatusUtils.getStatusForCoordRerun(coordJob, prevStatus));
542            coordJob.setPending();
543        }
544    
545        private final void transitToPrevious() throws CommandException {
546            coordJob.setStatus(getPrevStatus());
547            if (!prevPending) {
548                coordJob.resetPending();
549            }
550            else {
551                coordJob.setPending();
552            }
553        }
554    }