001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.service;
016    
017    import java.util.ArrayList;
018    import java.util.Date;
019    import java.util.HashMap;
020    import java.util.HashSet;
021    import java.util.List;
022    import java.util.Set;
023    
024    import org.apache.hadoop.conf.Configuration;
025    import org.apache.oozie.BundleActionBean;
026    import org.apache.oozie.BundleJobBean;
027    import org.apache.oozie.CoordinatorActionBean;
028    import org.apache.oozie.CoordinatorJobBean;
029    import org.apache.oozie.client.CoordinatorAction;
030    import org.apache.oozie.client.Job;
031    import org.apache.oozie.command.CommandException;
032    import org.apache.oozie.command.bundle.BundleKillXCommand;
033    import org.apache.oozie.command.bundle.BundleStatusUpdateXCommand;
034    import org.apache.oozie.executor.jpa.BundleActionsGetByLastModifiedTimeJPAExecutor;
035    import org.apache.oozie.executor.jpa.BundleActionsGetJPAExecutor;
036    import org.apache.oozie.executor.jpa.BundleJobGetJPAExecutor;
037    import org.apache.oozie.executor.jpa.BundleJobUpdateJPAExecutor;
038    import org.apache.oozie.executor.jpa.BundleJobsGetPendingJPAExecutor;
039    import org.apache.oozie.executor.jpa.BundleJobsGetRunningJPAExecutor;
040    import org.apache.oozie.executor.jpa.CoordActionsGetByLastModifiedTimeJPAExecutor;
041    import org.apache.oozie.executor.jpa.CoordJobGetActionsJPAExecutor;
042    import org.apache.oozie.executor.jpa.CoordJobGetJPAExecutor;
043    import org.apache.oozie.executor.jpa.CoordJobUpdateJPAExecutor;
044    import org.apache.oozie.executor.jpa.CoordJobsGetPendingJPAExecutor;
045    import org.apache.oozie.executor.jpa.JPAExecutorException;
046    import org.apache.oozie.util.DateUtils;
047    import org.apache.oozie.util.MemoryLocks;
048    import org.apache.oozie.util.StatusUtils;
049    import org.apache.oozie.util.XLog;
050    
051    /**
052     * StateTransitService is scheduled to run at the configured interval.
053     * <p/>
054     * It is to update job's status according to its child actions' status. If all child actions' pending flag equals 0 (job
055     * done), we reset the job's pending flag to 0. If all child actions are succeeded, we set the job's status to
056     * SUCCEEDED.
057     */
058    public class StatusTransitService implements Service {
059        public static final String CONF_PREFIX = Service.CONF_PREFIX + "StatusTransitService.";
060        public static final String CONF_STATUSTRANSIT_INTERVAL = CONF_PREFIX + "statusTransit.interval";
061        public static final String CONF_BACKWARD_SUPPORT_FOR_COORD_STATUS = CONF_PREFIX + "backward.support.for.coord.status";
062        private static int limit = -1;
063        private static Date lastInstanceStartTime = null;
064        private final static XLog LOG = XLog.getLog(StatusTransitRunnable.class);
065    
066        /**
067         * StateTransitRunnable is the runnable which is scheduled to run at the configured interval.
068         * <p/>
069         * It is to update job's status according to its child actions' status. If all child actions' pending flag equals 0
070         * (job done), we reset the job's pending flag to 0. If all child actions are succeeded, we set the job's status to
071         * SUCCEEDED.
072         */
073        static class StatusTransitRunnable implements Runnable {
074            private JPAService jpaService = null;
075            private MemoryLocks.LockToken lock;
076    
077            public StatusTransitRunnable() {
078                jpaService = Services.get().get(JPAService.class);
079                if (jpaService == null) {
080                    LOG.error("Missing JPAService");
081                }
082            }
083    
084            public void run() {
085                try {
086                    Date curDate = new Date(); // records the start time of this service run;
087    
088                    // first check if there is some other instance running;
089                    lock = Services.get().get(MemoryLocksService.class).getWriteLock(StatusTransitService.class.getName(),
090                            lockTimeout);
091                    if (lock == null) {
092                        LOG.info("This StatusTransitService instance"
093                                + " will not run since there is already an instance running");
094                    }
095                    else {
096                        LOG.info("Acquired lock for [{0}]", StatusTransitService.class.getName());
097                        // running coord jobs transit service
098                        coordTransit();
099                        // running bundle jobs transit service
100                        bundleTransit();
101    
102                        lastInstanceStartTime = curDate;
103                    }
104                }
105                catch (Exception ex) {
106                    LOG.warn("Exception happened during StatusTransitRunnable ", ex);
107                }
108                finally {
109                    // release lock;
110                    if (lock != null) {
111                        lock.release();
112                        LOG.info("Released lock for [{0}]", StatusTransitService.class.getName());
113                    }
114                }
115            }
116    
117            /**
118             * Aggregate bundle actions' status to bundle jobs
119             *
120             * @throws JPAExecutorException thrown if failed in db updates or retrievals
121             * @throws CommandException thrown if failed to run commands
122             */
123            private void bundleTransit() throws JPAExecutorException, CommandException {
124                List<BundleJobBean> pendingJobCheckList = null;
125                List<BundleJobBean> runningJobCheckList = null;
126                List<List<BundleJobBean>> bundleLists = new ArrayList<List<BundleJobBean>>();
127                if (lastInstanceStartTime == null) {
128                    LOG.info("Running bundle status service first instance");
129                    // this is the first instance, we need to check for all pending jobs;
130                    pendingJobCheckList = jpaService.execute(new BundleJobsGetPendingJPAExecutor(limit));
131                    runningJobCheckList = jpaService.execute(new BundleJobsGetRunningJPAExecutor(limit));
132                    bundleLists.add(pendingJobCheckList);
133                    bundleLists.add(runningJobCheckList);
134                }
135                else {
136                    LOG.info("Running bundle status service from last instance time =  "
137                            + DateUtils.convertDateToString(lastInstanceStartTime));
138                    // this is not the first instance, we should only check jobs that have actions been
139                    // updated >= start time of last service run;
140                    List<BundleActionBean> actionList = jpaService
141                            .execute(new BundleActionsGetByLastModifiedTimeJPAExecutor(lastInstanceStartTime));
142                    Set<String> bundleIds = new HashSet<String>();
143                    for (BundleActionBean action : actionList) {
144                        bundleIds.add(action.getBundleId());
145                    }
146                    pendingJobCheckList = new ArrayList<BundleJobBean>();
147                    for (String bundleId : bundleIds.toArray(new String[bundleIds.size()])) {
148                        BundleJobBean bundle = jpaService.execute(new BundleJobGetJPAExecutor(bundleId));
149                        // Running bundle job might have pending false
150                        if (bundle.isPending() || bundle.getStatus().equals(Job.Status.RUNNING)) {
151                            pendingJobCheckList.add(bundle);
152                        }
153                    }
154                    runningJobCheckList = pendingJobCheckList;
155                    bundleLists.add(pendingJobCheckList);
156                }
157                aggregateBundleJobsStatus(bundleLists);
158            }
159    
160            private void aggregateBundleJobsStatus(List<List<BundleJobBean>> bundleLists) throws JPAExecutorException,
161                    CommandException {
162                if (bundleLists != null) {
163                    for (List<BundleJobBean> listBundleBean : bundleLists) {
164                        for (BundleJobBean bundleJob : listBundleBean) {
165                            try {
166                                String jobId = bundleJob.getId();
167                                Job.Status[] bundleStatus = new Job.Status[1];
168                                bundleStatus[0] = bundleJob.getStatus();
169                                List<BundleActionBean> bundleActions = jpaService.execute(new BundleActionsGetJPAExecutor(
170                                        jobId));
171                                HashMap<Job.Status, Integer> bundleActionStatus = new HashMap<Job.Status, Integer>();
172                                boolean foundPending = false;
173                                for (BundleActionBean bAction : bundleActions) {
174                                    if (!bAction.isPending()) {
175                                        int counter = 0;
176                                        if (bundleActionStatus.containsKey(bAction.getStatus())) {
177                                            counter = bundleActionStatus.get(bAction.getStatus()) + 1;
178                                        }
179                                        else {
180                                            ++counter;
181                                        }
182                                        bundleActionStatus.put(bAction.getStatus(), counter);
183                                        if (bAction.getCoordId() == null
184                                                && (bAction.getStatus() == Job.Status.FAILED || bAction.getStatus() == Job.Status.KILLED)) {
185                                            (new BundleKillXCommand(jobId)).call();
186                                            LOG.info("Bundle job ["+ jobId
187                                                            + "] has been killed since one of its coordinator job failed submission.");
188                                        }
189                                    }
190                                    else {
191                                        foundPending = true;
192                                        break;
193                                    }
194                                }
195    
196                                if (foundPending) {
197                                    continue;
198                                }
199    
200                                if (checkTerminalStatus(bundleActionStatus, bundleActions, bundleStatus)) {
201                                    LOG.info("Set bundle job [" + jobId + "] status to '" + bundleStatus[0].toString()
202                                            + "' from '" + bundleJob.getStatus() + "'");
203                                    updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]);
204                                }
205                                else if (checkPrepStatus(bundleActionStatus, bundleActions, bundleStatus)) {
206                                    LOG.info("Set bundle job [" + jobId + "] status to '" + bundleStatus[0].toString()
207                                            + "' from '" + bundleJob.getStatus() + "'");
208                                    updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]);
209                                }
210                                else if (checkPausedStatus(bundleActionStatus, bundleActions, bundleStatus)) {
211                                    LOG.info("Set bundle job [" + jobId + "] status to '" + bundleStatus[0].toString()
212                                            + "' from '" + bundleJob.getStatus() + "'");
213                                    updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]);
214                                }
215                                else if (checkSuspendStatus(bundleActionStatus, bundleActions, bundleStatus)) {
216                                    LOG.info("Set bundle job [" + jobId + "] status to '" + bundleStatus[0].toString()
217                                            + "' from '" + bundleJob.getStatus() + "'");
218                                    updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]);
219                                }
220                                else if (checkRunningStatus(bundleActionStatus, bundleActions, bundleStatus)) {
221                                    LOG.info("Set bundle job [" + jobId + "] status to '" + bundleStatus[0].toString()
222                                            + "' from '" + bundleJob.getStatus() + "'");
223                                    updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]);
224                                }
225                            }
226                            catch (Exception ex) {
227                                LOG.error("Exception happened during aggregate bundle job's status, job = "
228                                        + bundleJob.getId(), ex);
229                            }
230                        }
231                    }
232                }
233            }
234    
235            private void aggregateCoordJobsStatus(List<CoordinatorJobBean> CoordList) throws JPAExecutorException,
236                    CommandException {
237                if (CoordList != null) {
238                    Configuration conf = Services.get().getConf();
239                    boolean backwardSupportForCoordStatus = conf.getBoolean(CONF_BACKWARD_SUPPORT_FOR_COORD_STATUS, false);
240    
241                    for (CoordinatorJobBean coordJob : CoordList) {
242                        try {
243                            // if namespace 0.1 is used and backward support is true, then ignore this coord job
244                            if (backwardSupportForCoordStatus == true && coordJob.getAppNamespace() != null
245                                    && coordJob.getAppNamespace().equals(SchemaService.COORDINATOR_NAMESPACE_URI_1)) {
246                                continue;
247                            }
248                            String jobId = coordJob.getId();
249                            Job.Status[] coordStatus = new Job.Status[1];
250                            coordStatus[0] = coordJob.getStatus();
251                            List<CoordinatorActionBean> coordActions = jpaService
252                                    .execute(new CoordJobGetActionsJPAExecutor(jobId));
253                            HashMap<CoordinatorAction.Status, Integer> coordActionStatus = new HashMap<CoordinatorAction.Status, Integer>();
254                            boolean foundPending = false;
255                            for (CoordinatorActionBean cAction : coordActions) {
256                                if (!cAction.isPending()) {
257                                    int counter = 0;
258                                    if (coordActionStatus.containsKey(cAction.getStatus())) {
259                                        counter = coordActionStatus.get(cAction.getStatus()) + 1;
260                                    }
261                                    else {
262                                        ++counter;
263                                    }
264                                    coordActionStatus.put(cAction.getStatus(), counter);
265                                }
266                                else {
267                                    foundPending = true;
268                                    break;
269                                }
270                            }
271    
272                            if (foundPending) {
273                                continue;
274                            }
275    
276                            if (coordJob.isDoneMaterialization()
277                                    && checkCoordTerminalStatus(coordActionStatus, coordActions, coordStatus)) {
278                                LOG.info("Set coordinator job [" + jobId + "] status to '" + coordStatus[0].toString()
279                                        + "' from '" + coordJob.getStatus() + "'");
280                                updateCoordJob(coordActionStatus, coordActions, coordJob, coordStatus[0]);
281                            }
282                            else if (coordJob.isDoneMaterialization()
283                                    && checkCoordSuspendStatus(coordActionStatus, coordActions, coordStatus)) {
284                                LOG.info("Set coordinator job [" + jobId + "] status to " + coordStatus[0].toString()
285                                        + "' from '" + coordJob.getStatus() + "'");
286                                updateCoordJob(coordActionStatus, coordActions, coordJob, coordStatus[0]);
287                            }
288                            else if (checkCoordRunningStatus(coordActionStatus, coordActions, coordStatus)) {
289                                LOG.info("Set coordinator job [" + jobId + "] status to " + coordStatus[0].toString()
290                                        + "' from '" + coordJob.getStatus() + "'");
291                                updateCoordJob(coordActionStatus, coordActions, coordJob, coordStatus[0]);
292                            }
293                            // checking pending flag for job when user killed or suspended the job
294                            else {
295                                checkCoordPending(coordActionStatus, coordActions, coordJob, true);
296                            }
297                        }
298                        catch (Exception ex) {
299                            LOG.error("Exception happened during aggregate coordinator job's status, job = "
300                                    + coordJob.getId(), ex);
301                        }
302                    }
303    
304                }
305            }
306    
307            private boolean checkTerminalStatus(HashMap<Job.Status, Integer> bundleActionStatus,
308                    List<BundleActionBean> bundleActions, Job.Status[] bundleStatus) {
309                boolean ret = false;
310                int totalValuesSucceed = 0;
311                if (bundleActionStatus.containsKey(Job.Status.SUCCEEDED)) {
312                    totalValuesSucceed = bundleActionStatus.get(Job.Status.SUCCEEDED);
313                }
314                int totalValuesFailed = 0;
315                if (bundleActionStatus.containsKey(Job.Status.FAILED)) {
316                    totalValuesFailed = bundleActionStatus.get(Job.Status.FAILED);
317                }
318                int totalValuesKilled = 0;
319                if (bundleActionStatus.containsKey(Job.Status.KILLED)) {
320                    totalValuesKilled = bundleActionStatus.get(Job.Status.KILLED);
321                }
322    
323                int totalValuesDoneWithError = 0;
324                if (bundleActionStatus.containsKey(Job.Status.DONEWITHERROR)) {
325                    totalValuesDoneWithError = bundleActionStatus.get(Job.Status.DONEWITHERROR);
326                }
327    
328                if (bundleActions.size() == (totalValuesSucceed + totalValuesFailed + totalValuesKilled + totalValuesDoneWithError)) {
329                    // If all the bundle actions are succeeded then bundle job should be succeeded.
330                    if (bundleActions.size() == totalValuesSucceed) {
331                        bundleStatus[0] = Job.Status.SUCCEEDED;
332                        ret = true;
333                    }
334                    else if (bundleActions.size() == totalValuesKilled) {
335                        // If all the bundle actions are KILLED then bundle job should be KILLED.
336                        bundleStatus[0] = Job.Status.KILLED;
337                        ret = true;
338                    }
339                    else if (bundleActions.size() == totalValuesFailed) {
340                        // If all the bundle actions are FAILED then bundle job should be FAILED.
341                        bundleStatus[0] = Job.Status.FAILED;
342                        ret = true;
343                    }
344                    else {
345                        bundleStatus[0] = Job.Status.DONEWITHERROR;
346                        ret = true;
347                    }
348                }
349                return ret;
350            }
351    
352            private boolean checkCoordTerminalStatus(HashMap<CoordinatorAction.Status, Integer> coordActionStatus,
353                    List<CoordinatorActionBean> coordActions, Job.Status[] coordStatus) {
354                boolean ret = false;
355                int totalValuesSucceed = 0;
356                if (coordActionStatus.containsKey(CoordinatorAction.Status.SUCCEEDED)) {
357                    totalValuesSucceed = coordActionStatus.get(CoordinatorAction.Status.SUCCEEDED);
358                }
359                int totalValuesFailed = 0;
360                if (coordActionStatus.containsKey(CoordinatorAction.Status.FAILED)) {
361                    totalValuesFailed = coordActionStatus.get(CoordinatorAction.Status.FAILED);
362                }
363                int totalValuesKilled = 0;
364                if (coordActionStatus.containsKey(CoordinatorAction.Status.KILLED)) {
365                    totalValuesKilled = coordActionStatus.get(CoordinatorAction.Status.KILLED);
366                }
367    
368                int totalValuesTimeOut = 0;
369                if (coordActionStatus.containsKey(CoordinatorAction.Status.TIMEDOUT)) {
370                    totalValuesTimeOut = coordActionStatus.get(CoordinatorAction.Status.TIMEDOUT);
371                }
372    
373                if (coordActions.size() == (totalValuesSucceed + totalValuesFailed + totalValuesKilled + totalValuesTimeOut)) {
374                    // If all the coordinator actions are succeeded then coordinator job should be succeeded.
375                    if (coordActions.size() == totalValuesSucceed) {
376                        coordStatus[0] = Job.Status.SUCCEEDED;
377                        ret = true;
378                    }
379                    else if (coordActions.size() == totalValuesKilled) {
380                        // If all the coordinator actions are KILLED then coordinator job should be KILLED.
381                        coordStatus[0] = Job.Status.KILLED;
382                        ret = true;
383                    }
384                    else if (coordActions.size() == totalValuesFailed) {
385                        // If all the coordinator actions are FAILED then coordinator job should be FAILED.
386                        coordStatus[0] = Job.Status.FAILED;
387                        ret = true;
388                    }
389                    else {
390                        coordStatus[0] = Job.Status.DONEWITHERROR;
391                        ret = true;
392                    }
393                }
394                return ret;
395            }
396    
397            private boolean checkPrepStatus(HashMap<Job.Status, Integer> bundleActionStatus,
398                    List<BundleActionBean> bundleActions, Job.Status[] bundleStatus) {
399                boolean ret = false;
400                if (bundleActionStatus.containsKey(Job.Status.PREP)) {
401                    // If all the bundle actions are PREP then bundle job should be RUNNING.
402                    if (bundleActions.size() > bundleActionStatus.get(Job.Status.PREP)) {
403                        bundleStatus[0] = Job.Status.RUNNING;
404                        ret = true;
405                    }
406                }
407                return ret;
408            }
409    
410            private boolean checkPausedStatus(HashMap<Job.Status, Integer> bundleActionStatus,
411                    List<BundleActionBean> bundleActions, Job.Status[] bundleStatus) {
412                boolean ret = false;
413                if (bundleActionStatus.containsKey(Job.Status.PAUSED)) {
414                    if (bundleActions.size() == bundleActionStatus.get(Job.Status.PAUSED)) {
415                        bundleStatus[0] = Job.Status.PAUSED;
416                        ret = true;
417                    }
418                    else if (bundleActionStatus.containsKey(Job.Status.PAUSEDWITHERROR)
419                            && (bundleActions.size() == bundleActionStatus.get(Job.Status.PAUSED)
420                                    + bundleActionStatus.get(Job.Status.PAUSEDWITHERROR))) {
421                        // bundleStatus = Job.Status.PAUSEDWITHERROR;
422                        // We need to change this to PAUSEDWITHERROR in future when we add this to coordinator
423                        bundleStatus[0] = Job.Status.PAUSED;
424                        ret = true;
425                    }
426                }
427                return ret;
428            }
429    
430            private boolean checkSuspendStatus(HashMap<Job.Status, Integer> bundleActionStatus,
431                    List<BundleActionBean> bundleActions, Job.Status[] bundleStatus) {
432                boolean ret = false;
433                if (bundleActionStatus.containsKey(Job.Status.SUSPENDED)) {
434                    if (bundleActions.size() == bundleActionStatus.get(Job.Status.SUSPENDED)) {
435                        bundleStatus[0] = Job.Status.SUSPENDED;
436                        ret = true;
437                    }
438                    else if (bundleActionStatus.containsKey(Job.Status.SUSPENDEDWITHERROR)
439                            && (bundleActions.size() == bundleActionStatus.get(Job.Status.SUSPENDED)
440                                    + bundleActionStatus.get(Job.Status.SUSPENDEDWITHERROR))) {
441                        // bundleStatus = Job.Status.SUSPENDEDWITHERROR;
442                        // We need to change this to SUSPENDEDWITHERROR in future when we add this to coordinator
443                        bundleStatus[0] = Job.Status.SUSPENDED;
444                        ret = true;
445                    }
446                }
447                return ret;
448            }
449    
450            private boolean checkCoordSuspendStatus(HashMap<CoordinatorAction.Status, Integer> coordActionStatus,
451                    List<CoordinatorActionBean> coordActions, Job.Status[] coordStatus) {
452                boolean ret = false;
453                if (coordActionStatus.containsKey(CoordinatorAction.Status.SUSPENDED)) {
454                    if (coordActions.size() == coordActionStatus.get(CoordinatorAction.Status.SUSPENDED)) {
455                        coordStatus[0] = Job.Status.SUSPENDED;
456                        ret = true;
457                    }
458                }
459                return ret;
460            }
461    
462            private boolean checkCoordRunningStatus(HashMap<CoordinatorAction.Status, Integer> coordActionStatus,
463                    List<CoordinatorActionBean> coordActions, Job.Status[] coordStatus) {
464                boolean ret = false;
465                if (coordActionStatus.containsKey(CoordinatorAction.Status.RUNNING)) {
466                    // If all the bundle actions are succeeded then bundle job should be succeeded.
467                    if (coordActions.size() == coordActionStatus.get(CoordinatorAction.Status.RUNNING)) {
468                        coordStatus[0] = Job.Status.RUNNING;
469                        ret = true;
470                    }
471                    else if (coordActionStatus.get(CoordinatorAction.Status.RUNNING) > 0) {
472                        if ((coordActionStatus.containsKey(CoordinatorAction.Status.FAILED) && coordActionStatus.get(CoordinatorAction.Status.FAILED) > 0)
473                                || (coordActionStatus.containsKey(CoordinatorAction.Status.KILLED) && coordActionStatus
474                                        .get(CoordinatorAction.Status.KILLED) > 0)
475                                || (coordActionStatus.containsKey(CoordinatorAction.Status.TIMEDOUT) && coordActionStatus
476                                        .get(CoordinatorAction.Status.TIMEDOUT) > 0)) {
477                            // coordStatus = Job.Status.RUNNINGWITHERROR;
478                            // We need to change this to RUNNINGWIHERROR in future when we add this to coordinator
479                            coordStatus[0] = Job.Status.RUNNING;
480                            ret = true;
481                        }
482                    }
483                }
484                return ret;
485            }
486    
487            private boolean checkRunningStatus(HashMap<Job.Status, Integer> bundleActionStatus,
488                    List<BundleActionBean> bundleActions, Job.Status[] bundleStatus) {
489                boolean ret = false;
490                if (bundleActionStatus.containsKey(Job.Status.RUNNING)) {
491                    // If all the bundle actions are succeeded then bundle job should be succeeded.
492                    if (bundleActions.size() == bundleActionStatus.get(Job.Status.RUNNING)) {
493                        bundleStatus[0] = Job.Status.RUNNING;
494                        ret = true;
495                    }
496                    else if (bundleActionStatus.get(Job.Status.RUNNING) > 0) {
497                        if ((bundleActionStatus.containsKey(Job.Status.FAILED) && bundleActionStatus.get(Job.Status.FAILED) > 0)
498                                || (bundleActionStatus.containsKey(Job.Status.KILLED) && bundleActionStatus
499                                        .get(Job.Status.KILLED) > 0)
500                                || (bundleActionStatus.containsKey(Job.Status.DONEWITHERROR) && bundleActionStatus
501                                        .get(Job.Status.DONEWITHERROR) > 0)
502                                || (bundleActionStatus.containsKey(Job.Status.RUNNINGWITHERROR) && bundleActionStatus
503                                        .get(Job.Status.RUNNINGWITHERROR) > 0)) {
504                            // bundleStatus = Job.Status.RUNNINGWITHERROR;
505                            // We need to change this to RUNNINGWIHERROR in future when we add this to coordinator
506                            bundleStatus[0] = Job.Status.RUNNING;
507                            ret = true;
508                        }
509                    }
510                }
511                return ret;
512            }
513    
514            private void updateBundleJob(HashMap<Job.Status, Integer> bundleActionStatus,
515                    List<BundleActionBean> bundleActions, BundleJobBean bundleJob, Job.Status bundleStatus)
516                    throws JPAExecutorException {
517                String jobId = bundleJob.getId();
518                boolean pendingBundleJob = bundleJob.isPending();
519                // Checking the bundle pending should be updated or not
520                int totalNonPendingActions = 0;
521                for (Job.Status js : bundleActionStatus.keySet()) {
522                    totalNonPendingActions += bundleActionStatus.get(js);
523                }
524    
525                if (totalNonPendingActions == bundleActions.size()) {
526                    pendingBundleJob = false;
527                }
528    
529                // Update the Bundle Job
530                bundleJob.setStatus(bundleStatus);
531                if (pendingBundleJob) {
532                    bundleJob.setPending();
533                    LOG.info("Bundle job [" + jobId + "] Pending set to TRUE");
534                }
535                else {
536                    bundleJob.resetPending();
537                    LOG.info("Bundle job [" + jobId + "] Pending set to FALSE");
538                }
539                jpaService.execute(new BundleJobUpdateJPAExecutor(bundleJob));
540            }
541    
542            private void updateCoordJob(HashMap<CoordinatorAction.Status, Integer> coordActionStatus,
543                    List<CoordinatorActionBean> coordActions, CoordinatorJobBean coordJob, Job.Status coordStatus)
544                    throws JPAExecutorException, CommandException {
545                Job.Status prevStatus = coordJob.getStatus();
546                // Update the Coord Job
547                if (coordJob.getStatus() == Job.Status.SUCCEEDED || coordJob.getStatus() == Job.Status.FAILED
548                        || coordJob.getStatus() == Job.Status.KILLED || coordJob.getStatus() == Job.Status.DONEWITHERROR) {
549                    if (coordStatus == Job.Status.SUSPENDED) {
550                        LOG.info("Coord Job [" + coordJob.getId()
551                                + "] status can not be updated as its already in Terminal state");
552                        return;
553                    }
554                }
555    
556                checkCoordPending(coordActionStatus, coordActions, coordJob, false);
557                coordJob.setStatus(coordStatus);
558                coordJob.setStatus(StatusUtils.getStatus(coordJob));
559                coordJob.setLastModifiedTime(new Date());
560                jpaService.execute(new CoordJobUpdateJPAExecutor(coordJob));
561                // update bundle action only when status changes in coord job
562                if (coordJob.getBundleId() != null) {
563                    if (!prevStatus.equals(coordJob.getStatus())) {
564                        BundleStatusUpdateXCommand bundleStatusUpdate = new BundleStatusUpdateXCommand(coordJob, prevStatus);
565                        bundleStatusUpdate.call();
566                    }
567                }
568            }
569    
570            private void checkCoordPending(HashMap<CoordinatorAction.Status, Integer> coordActionStatus,
571                    List<CoordinatorActionBean> coordActions, CoordinatorJobBean coordJob, boolean saveToDB) throws JPAExecutorException {
572                boolean pendingCoordJob = coordJob.isPending();
573                // Checking the coordinator pending should be updated or not
574                int totalNonPendingActions = 0;
575                for (CoordinatorAction.Status js : coordActionStatus.keySet()) {
576                    totalNonPendingActions += coordActionStatus.get(js);
577                }
578    
579                if (totalNonPendingActions == coordActions.size()) {
580                    pendingCoordJob = false;
581                }
582    
583                if (pendingCoordJob) {
584                    coordJob.setPending();
585                    LOG.info("Coord job [" + coordJob.getId() + "] Pending set to TRUE");
586                }
587                else {
588                    coordJob.resetPending();
589                    LOG.info("Coord job [" + coordJob.getId() + "] Pending set to FALSE");
590                }
591    
592                if (saveToDB) {
593                    jpaService.execute(new CoordJobUpdateJPAExecutor(coordJob));
594                }
595            }
596    
597            /**
598             * Aggregate coordinator actions' status to coordinator jobs
599             *
600             * @throws JPAExecutorException thrown if failed in db updates or retrievals
601             * @throws CommandException thrown if failed to run commands
602             */
603            private void coordTransit() throws JPAExecutorException, CommandException {
604                List<CoordinatorJobBean> pendingJobCheckList = null;
605                if (lastInstanceStartTime == null) {
606                    LOG.info("Running coordinator status service first instance");
607                    // this is the first instance, we need to check for all pending jobs;
608                    pendingJobCheckList = jpaService.execute(new CoordJobsGetPendingJPAExecutor(limit));
609                }
610                else {
611                    LOG.info("Running coordinator status service from last instance time =  "
612                            + DateUtils.convertDateToString(lastInstanceStartTime));
613                    // this is not the first instance, we should only check jobs that have actions been
614                    // updated >= start time of last service run;
615                    List<CoordinatorActionBean> actionList = jpaService
616                            .execute(new CoordActionsGetByLastModifiedTimeJPAExecutor(lastInstanceStartTime));
617                    Set<String> coordIds = new HashSet<String>();
618                    for (CoordinatorActionBean action : actionList) {
619                        coordIds.add(action.getJobId());
620                    }
621                    pendingJobCheckList = new ArrayList<CoordinatorJobBean>();
622                    for (String coordId : coordIds.toArray(new String[coordIds.size()])) {
623                        CoordinatorJobBean coordJob = jpaService.execute(new CoordJobGetJPAExecutor(coordId));
624                        // Running coord job might have pending false
625                        if (coordJob.isPending() || coordJob.getStatus().equals(Job.Status.RUNNING)) {
626                            pendingJobCheckList.add(coordJob);
627                        }
628                    }
629                }
630                aggregateCoordJobsStatus(pendingJobCheckList);
631            }
632        }
633    
634        /**
635         * Initializes the {@link StatusTransitService}.
636         *
637         * @param services services instance.
638         */
639        @Override
640        public void init(Services services) {
641            Configuration conf = services.getConf();
642            Runnable stateTransitRunnable = new StatusTransitRunnable();
643            services.get(SchedulerService.class).schedule(stateTransitRunnable, 10,
644                    conf.getInt(CONF_STATUSTRANSIT_INTERVAL, 60), SchedulerService.Unit.SEC);
645        }
646    
647        /**
648         * Destroy the StateTransit Jobs Service.
649         */
650        @Override
651        public void destroy() {
652        }
653    
654        /**
655         * Return the public interface for the purge jobs service.
656         *
657         * @return {@link StatusTransitService}.
658         */
659        @Override
660        public Class<? extends Service> getInterface() {
661            return StatusTransitService.class;
662        }
663    }