001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.service; 016 017 import java.util.ArrayList; 018 import java.util.Date; 019 import java.util.HashMap; 020 import java.util.HashSet; 021 import java.util.List; 022 import java.util.Set; 023 024 import org.apache.hadoop.conf.Configuration; 025 import org.apache.oozie.BundleActionBean; 026 import org.apache.oozie.BundleJobBean; 027 import org.apache.oozie.CoordinatorActionBean; 028 import org.apache.oozie.CoordinatorJobBean; 029 import org.apache.oozie.client.CoordinatorAction; 030 import org.apache.oozie.client.Job; 031 import org.apache.oozie.command.CommandException; 032 import org.apache.oozie.command.bundle.BundleKillXCommand; 033 import org.apache.oozie.command.bundle.BundleStatusUpdateXCommand; 034 import org.apache.oozie.executor.jpa.BundleActionsGetByLastModifiedTimeJPAExecutor; 035 import org.apache.oozie.executor.jpa.BundleActionsGetJPAExecutor; 036 import org.apache.oozie.executor.jpa.BundleJobGetJPAExecutor; 037 import org.apache.oozie.executor.jpa.BundleJobUpdateJPAExecutor; 038 import org.apache.oozie.executor.jpa.BundleJobsGetPendingJPAExecutor; 039 import org.apache.oozie.executor.jpa.BundleJobsGetRunningJPAExecutor; 040 import org.apache.oozie.executor.jpa.CoordActionsGetByLastModifiedTimeJPAExecutor; 041 import org.apache.oozie.executor.jpa.CoordJobGetActionsJPAExecutor; 042 import org.apache.oozie.executor.jpa.CoordJobGetJPAExecutor; 043 import org.apache.oozie.executor.jpa.CoordJobUpdateJPAExecutor; 044 import org.apache.oozie.executor.jpa.CoordJobsGetPendingJPAExecutor; 045 import org.apache.oozie.executor.jpa.JPAExecutorException; 046 import org.apache.oozie.util.DateUtils; 047 import org.apache.oozie.util.MemoryLocks; 048 import org.apache.oozie.util.StatusUtils; 049 import org.apache.oozie.util.XLog; 050 051 /** 052 * StateTransitService is scheduled to run at the configured interval. 053 * <p/> 054 * It is to update job's status according to its child actions' status. If all child actions' pending flag equals 0 (job 055 * done), we reset the job's pending flag to 0. If all child actions are succeeded, we set the job's status to 056 * SUCCEEDED. 057 */ 058 public class StatusTransitService implements Service { 059 public static final String CONF_PREFIX = Service.CONF_PREFIX + "StatusTransitService."; 060 public static final String CONF_STATUSTRANSIT_INTERVAL = CONF_PREFIX + "statusTransit.interval"; 061 public static final String CONF_BACKWARD_SUPPORT_FOR_COORD_STATUS = CONF_PREFIX + "backward.support.for.coord.status"; 062 private static int limit = -1; 063 private static Date lastInstanceStartTime = null; 064 private final static XLog LOG = XLog.getLog(StatusTransitRunnable.class); 065 066 /** 067 * StateTransitRunnable is the runnable which is scheduled to run at the configured interval. 068 * <p/> 069 * It is to update job's status according to its child actions' status. If all child actions' pending flag equals 0 070 * (job done), we reset the job's pending flag to 0. If all child actions are succeeded, we set the job's status to 071 * SUCCEEDED. 072 */ 073 static class StatusTransitRunnable implements Runnable { 074 private JPAService jpaService = null; 075 private MemoryLocks.LockToken lock; 076 077 public StatusTransitRunnable() { 078 jpaService = Services.get().get(JPAService.class); 079 if (jpaService == null) { 080 LOG.error("Missing JPAService"); 081 } 082 } 083 084 public void run() { 085 try { 086 Date curDate = new Date(); // records the start time of this service run; 087 088 // first check if there is some other instance running; 089 lock = Services.get().get(MemoryLocksService.class).getWriteLock(StatusTransitService.class.getName(), 090 lockTimeout); 091 if (lock == null) { 092 LOG.info("This StatusTransitService instance" 093 + " will not run since there is already an instance running"); 094 } 095 else { 096 LOG.info("Acquired lock for [{0}]", StatusTransitService.class.getName()); 097 // running coord jobs transit service 098 coordTransit(); 099 // running bundle jobs transit service 100 bundleTransit(); 101 102 lastInstanceStartTime = curDate; 103 } 104 } 105 catch (Exception ex) { 106 LOG.warn("Exception happened during StatusTransitRunnable ", ex); 107 } 108 finally { 109 // release lock; 110 if (lock != null) { 111 lock.release(); 112 LOG.info("Released lock for [{0}]", StatusTransitService.class.getName()); 113 } 114 } 115 } 116 117 /** 118 * Aggregate bundle actions' status to bundle jobs 119 * 120 * @throws JPAExecutorException thrown if failed in db updates or retrievals 121 * @throws CommandException thrown if failed to run commands 122 */ 123 private void bundleTransit() throws JPAExecutorException, CommandException { 124 List<BundleJobBean> pendingJobCheckList = null; 125 List<BundleJobBean> runningJobCheckList = null; 126 List<List<BundleJobBean>> bundleLists = new ArrayList<List<BundleJobBean>>(); 127 if (lastInstanceStartTime == null) { 128 LOG.info("Running bundle status service first instance"); 129 // this is the first instance, we need to check for all pending jobs; 130 pendingJobCheckList = jpaService.execute(new BundleJobsGetPendingJPAExecutor(limit)); 131 runningJobCheckList = jpaService.execute(new BundleJobsGetRunningJPAExecutor(limit)); 132 bundleLists.add(pendingJobCheckList); 133 bundleLists.add(runningJobCheckList); 134 } 135 else { 136 LOG.info("Running bundle status service from last instance time = " 137 + DateUtils.convertDateToString(lastInstanceStartTime)); 138 // this is not the first instance, we should only check jobs that have actions been 139 // updated >= start time of last service run; 140 List<BundleActionBean> actionList = jpaService 141 .execute(new BundleActionsGetByLastModifiedTimeJPAExecutor(lastInstanceStartTime)); 142 Set<String> bundleIds = new HashSet<String>(); 143 for (BundleActionBean action : actionList) { 144 bundleIds.add(action.getBundleId()); 145 } 146 pendingJobCheckList = new ArrayList<BundleJobBean>(); 147 for (String bundleId : bundleIds.toArray(new String[bundleIds.size()])) { 148 BundleJobBean bundle = jpaService.execute(new BundleJobGetJPAExecutor(bundleId)); 149 // Running bundle job might have pending false 150 if (bundle.isPending() || bundle.getStatus().equals(Job.Status.RUNNING)) { 151 pendingJobCheckList.add(bundle); 152 } 153 } 154 runningJobCheckList = pendingJobCheckList; 155 bundleLists.add(pendingJobCheckList); 156 } 157 aggregateBundleJobsStatus(bundleLists); 158 } 159 160 private void aggregateBundleJobsStatus(List<List<BundleJobBean>> bundleLists) throws JPAExecutorException, 161 CommandException { 162 if (bundleLists != null) { 163 for (List<BundleJobBean> listBundleBean : bundleLists) { 164 for (BundleJobBean bundleJob : listBundleBean) { 165 try { 166 String jobId = bundleJob.getId(); 167 Job.Status[] bundleStatus = new Job.Status[1]; 168 bundleStatus[0] = bundleJob.getStatus(); 169 List<BundleActionBean> bundleActions = jpaService.execute(new BundleActionsGetJPAExecutor( 170 jobId)); 171 HashMap<Job.Status, Integer> bundleActionStatus = new HashMap<Job.Status, Integer>(); 172 boolean foundPending = false; 173 for (BundleActionBean bAction : bundleActions) { 174 if (!bAction.isPending()) { 175 int counter = 0; 176 if (bundleActionStatus.containsKey(bAction.getStatus())) { 177 counter = bundleActionStatus.get(bAction.getStatus()) + 1; 178 } 179 else { 180 ++counter; 181 } 182 bundleActionStatus.put(bAction.getStatus(), counter); 183 if (bAction.getCoordId() == null 184 && (bAction.getStatus() == Job.Status.FAILED || bAction.getStatus() == Job.Status.KILLED)) { 185 (new BundleKillXCommand(jobId)).call(); 186 LOG.info("Bundle job ["+ jobId 187 + "] has been killed since one of its coordinator job failed submission."); 188 } 189 } 190 else { 191 foundPending = true; 192 break; 193 } 194 } 195 196 if (foundPending) { 197 continue; 198 } 199 200 if (checkTerminalStatus(bundleActionStatus, bundleActions, bundleStatus)) { 201 LOG.info("Set bundle job [" + jobId + "] status to '" + bundleStatus[0].toString() 202 + "' from '" + bundleJob.getStatus() + "'"); 203 updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]); 204 } 205 else if (checkPrepStatus(bundleActionStatus, bundleActions, bundleStatus)) { 206 LOG.info("Set bundle job [" + jobId + "] status to '" + bundleStatus[0].toString() 207 + "' from '" + bundleJob.getStatus() + "'"); 208 updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]); 209 } 210 else if (checkPausedStatus(bundleActionStatus, bundleActions, bundleStatus)) { 211 LOG.info("Set bundle job [" + jobId + "] status to '" + bundleStatus[0].toString() 212 + "' from '" + bundleJob.getStatus() + "'"); 213 updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]); 214 } 215 else if (checkSuspendStatus(bundleActionStatus, bundleActions, bundleStatus)) { 216 LOG.info("Set bundle job [" + jobId + "] status to '" + bundleStatus[0].toString() 217 + "' from '" + bundleJob.getStatus() + "'"); 218 updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]); 219 } 220 else if (checkRunningStatus(bundleActionStatus, bundleActions, bundleStatus)) { 221 LOG.info("Set bundle job [" + jobId + "] status to '" + bundleStatus[0].toString() 222 + "' from '" + bundleJob.getStatus() + "'"); 223 updateBundleJob(bundleActionStatus, bundleActions, bundleJob, bundleStatus[0]); 224 } 225 } 226 catch (Exception ex) { 227 LOG.error("Exception happened during aggregate bundle job's status, job = " 228 + bundleJob.getId(), ex); 229 } 230 } 231 } 232 } 233 } 234 235 private void aggregateCoordJobsStatus(List<CoordinatorJobBean> CoordList) throws JPAExecutorException, 236 CommandException { 237 if (CoordList != null) { 238 Configuration conf = Services.get().getConf(); 239 boolean backwardSupportForCoordStatus = conf.getBoolean(CONF_BACKWARD_SUPPORT_FOR_COORD_STATUS, false); 240 241 for (CoordinatorJobBean coordJob : CoordList) { 242 try { 243 // if namespace 0.1 is used and backward support is true, then ignore this coord job 244 if (backwardSupportForCoordStatus == true && coordJob.getAppNamespace() != null 245 && coordJob.getAppNamespace().equals(SchemaService.COORDINATOR_NAMESPACE_URI_1)) { 246 continue; 247 } 248 String jobId = coordJob.getId(); 249 Job.Status[] coordStatus = new Job.Status[1]; 250 coordStatus[0] = coordJob.getStatus(); 251 List<CoordinatorActionBean> coordActions = jpaService 252 .execute(new CoordJobGetActionsJPAExecutor(jobId)); 253 HashMap<CoordinatorAction.Status, Integer> coordActionStatus = new HashMap<CoordinatorAction.Status, Integer>(); 254 boolean foundPending = false; 255 for (CoordinatorActionBean cAction : coordActions) { 256 if (!cAction.isPending()) { 257 int counter = 0; 258 if (coordActionStatus.containsKey(cAction.getStatus())) { 259 counter = coordActionStatus.get(cAction.getStatus()) + 1; 260 } 261 else { 262 ++counter; 263 } 264 coordActionStatus.put(cAction.getStatus(), counter); 265 } 266 else { 267 foundPending = true; 268 break; 269 } 270 } 271 272 if (foundPending) { 273 continue; 274 } 275 276 if (coordJob.isDoneMaterialization() 277 && checkCoordTerminalStatus(coordActionStatus, coordActions, coordStatus)) { 278 LOG.info("Set coordinator job [" + jobId + "] status to '" + coordStatus[0].toString() 279 + "' from '" + coordJob.getStatus() + "'"); 280 updateCoordJob(coordActionStatus, coordActions, coordJob, coordStatus[0]); 281 } 282 else if (coordJob.isDoneMaterialization() 283 && checkCoordSuspendStatus(coordActionStatus, coordActions, coordStatus)) { 284 LOG.info("Set coordinator job [" + jobId + "] status to " + coordStatus[0].toString() 285 + "' from '" + coordJob.getStatus() + "'"); 286 updateCoordJob(coordActionStatus, coordActions, coordJob, coordStatus[0]); 287 } 288 else if (checkCoordRunningStatus(coordActionStatus, coordActions, coordStatus)) { 289 LOG.info("Set coordinator job [" + jobId + "] status to " + coordStatus[0].toString() 290 + "' from '" + coordJob.getStatus() + "'"); 291 updateCoordJob(coordActionStatus, coordActions, coordJob, coordStatus[0]); 292 } 293 // checking pending flag for job when user killed or suspended the job 294 else { 295 checkCoordPending(coordActionStatus, coordActions, coordJob, true); 296 } 297 } 298 catch (Exception ex) { 299 LOG.error("Exception happened during aggregate coordinator job's status, job = " 300 + coordJob.getId(), ex); 301 } 302 } 303 304 } 305 } 306 307 private boolean checkTerminalStatus(HashMap<Job.Status, Integer> bundleActionStatus, 308 List<BundleActionBean> bundleActions, Job.Status[] bundleStatus) { 309 boolean ret = false; 310 int totalValuesSucceed = 0; 311 if (bundleActionStatus.containsKey(Job.Status.SUCCEEDED)) { 312 totalValuesSucceed = bundleActionStatus.get(Job.Status.SUCCEEDED); 313 } 314 int totalValuesFailed = 0; 315 if (bundleActionStatus.containsKey(Job.Status.FAILED)) { 316 totalValuesFailed = bundleActionStatus.get(Job.Status.FAILED); 317 } 318 int totalValuesKilled = 0; 319 if (bundleActionStatus.containsKey(Job.Status.KILLED)) { 320 totalValuesKilled = bundleActionStatus.get(Job.Status.KILLED); 321 } 322 323 int totalValuesDoneWithError = 0; 324 if (bundleActionStatus.containsKey(Job.Status.DONEWITHERROR)) { 325 totalValuesDoneWithError = bundleActionStatus.get(Job.Status.DONEWITHERROR); 326 } 327 328 if (bundleActions.size() == (totalValuesSucceed + totalValuesFailed + totalValuesKilled + totalValuesDoneWithError)) { 329 // If all the bundle actions are succeeded then bundle job should be succeeded. 330 if (bundleActions.size() == totalValuesSucceed) { 331 bundleStatus[0] = Job.Status.SUCCEEDED; 332 ret = true; 333 } 334 else if (bundleActions.size() == totalValuesKilled) { 335 // If all the bundle actions are KILLED then bundle job should be KILLED. 336 bundleStatus[0] = Job.Status.KILLED; 337 ret = true; 338 } 339 else if (bundleActions.size() == totalValuesFailed) { 340 // If all the bundle actions are FAILED then bundle job should be FAILED. 341 bundleStatus[0] = Job.Status.FAILED; 342 ret = true; 343 } 344 else { 345 bundleStatus[0] = Job.Status.DONEWITHERROR; 346 ret = true; 347 } 348 } 349 return ret; 350 } 351 352 private boolean checkCoordTerminalStatus(HashMap<CoordinatorAction.Status, Integer> coordActionStatus, 353 List<CoordinatorActionBean> coordActions, Job.Status[] coordStatus) { 354 boolean ret = false; 355 int totalValuesSucceed = 0; 356 if (coordActionStatus.containsKey(CoordinatorAction.Status.SUCCEEDED)) { 357 totalValuesSucceed = coordActionStatus.get(CoordinatorAction.Status.SUCCEEDED); 358 } 359 int totalValuesFailed = 0; 360 if (coordActionStatus.containsKey(CoordinatorAction.Status.FAILED)) { 361 totalValuesFailed = coordActionStatus.get(CoordinatorAction.Status.FAILED); 362 } 363 int totalValuesKilled = 0; 364 if (coordActionStatus.containsKey(CoordinatorAction.Status.KILLED)) { 365 totalValuesKilled = coordActionStatus.get(CoordinatorAction.Status.KILLED); 366 } 367 368 int totalValuesTimeOut = 0; 369 if (coordActionStatus.containsKey(CoordinatorAction.Status.TIMEDOUT)) { 370 totalValuesTimeOut = coordActionStatus.get(CoordinatorAction.Status.TIMEDOUT); 371 } 372 373 if (coordActions.size() == (totalValuesSucceed + totalValuesFailed + totalValuesKilled + totalValuesTimeOut)) { 374 // If all the coordinator actions are succeeded then coordinator job should be succeeded. 375 if (coordActions.size() == totalValuesSucceed) { 376 coordStatus[0] = Job.Status.SUCCEEDED; 377 ret = true; 378 } 379 else if (coordActions.size() == totalValuesKilled) { 380 // If all the coordinator actions are KILLED then coordinator job should be KILLED. 381 coordStatus[0] = Job.Status.KILLED; 382 ret = true; 383 } 384 else if (coordActions.size() == totalValuesFailed) { 385 // If all the coordinator actions are FAILED then coordinator job should be FAILED. 386 coordStatus[0] = Job.Status.FAILED; 387 ret = true; 388 } 389 else { 390 coordStatus[0] = Job.Status.DONEWITHERROR; 391 ret = true; 392 } 393 } 394 return ret; 395 } 396 397 private boolean checkPrepStatus(HashMap<Job.Status, Integer> bundleActionStatus, 398 List<BundleActionBean> bundleActions, Job.Status[] bundleStatus) { 399 boolean ret = false; 400 if (bundleActionStatus.containsKey(Job.Status.PREP)) { 401 // If all the bundle actions are PREP then bundle job should be RUNNING. 402 if (bundleActions.size() > bundleActionStatus.get(Job.Status.PREP)) { 403 bundleStatus[0] = Job.Status.RUNNING; 404 ret = true; 405 } 406 } 407 return ret; 408 } 409 410 private boolean checkPausedStatus(HashMap<Job.Status, Integer> bundleActionStatus, 411 List<BundleActionBean> bundleActions, Job.Status[] bundleStatus) { 412 boolean ret = false; 413 if (bundleActionStatus.containsKey(Job.Status.PAUSED)) { 414 if (bundleActions.size() == bundleActionStatus.get(Job.Status.PAUSED)) { 415 bundleStatus[0] = Job.Status.PAUSED; 416 ret = true; 417 } 418 else if (bundleActionStatus.containsKey(Job.Status.PAUSEDWITHERROR) 419 && (bundleActions.size() == bundleActionStatus.get(Job.Status.PAUSED) 420 + bundleActionStatus.get(Job.Status.PAUSEDWITHERROR))) { 421 // bundleStatus = Job.Status.PAUSEDWITHERROR; 422 // We need to change this to PAUSEDWITHERROR in future when we add this to coordinator 423 bundleStatus[0] = Job.Status.PAUSED; 424 ret = true; 425 } 426 } 427 return ret; 428 } 429 430 private boolean checkSuspendStatus(HashMap<Job.Status, Integer> bundleActionStatus, 431 List<BundleActionBean> bundleActions, Job.Status[] bundleStatus) { 432 boolean ret = false; 433 if (bundleActionStatus.containsKey(Job.Status.SUSPENDED)) { 434 if (bundleActions.size() == bundleActionStatus.get(Job.Status.SUSPENDED)) { 435 bundleStatus[0] = Job.Status.SUSPENDED; 436 ret = true; 437 } 438 else if (bundleActionStatus.containsKey(Job.Status.SUSPENDEDWITHERROR) 439 && (bundleActions.size() == bundleActionStatus.get(Job.Status.SUSPENDED) 440 + bundleActionStatus.get(Job.Status.SUSPENDEDWITHERROR))) { 441 // bundleStatus = Job.Status.SUSPENDEDWITHERROR; 442 // We need to change this to SUSPENDEDWITHERROR in future when we add this to coordinator 443 bundleStatus[0] = Job.Status.SUSPENDED; 444 ret = true; 445 } 446 } 447 return ret; 448 } 449 450 private boolean checkCoordSuspendStatus(HashMap<CoordinatorAction.Status, Integer> coordActionStatus, 451 List<CoordinatorActionBean> coordActions, Job.Status[] coordStatus) { 452 boolean ret = false; 453 if (coordActionStatus.containsKey(CoordinatorAction.Status.SUSPENDED)) { 454 if (coordActions.size() == coordActionStatus.get(CoordinatorAction.Status.SUSPENDED)) { 455 coordStatus[0] = Job.Status.SUSPENDED; 456 ret = true; 457 } 458 } 459 return ret; 460 } 461 462 private boolean checkCoordRunningStatus(HashMap<CoordinatorAction.Status, Integer> coordActionStatus, 463 List<CoordinatorActionBean> coordActions, Job.Status[] coordStatus) { 464 boolean ret = false; 465 if (coordActionStatus.containsKey(CoordinatorAction.Status.RUNNING)) { 466 // If all the bundle actions are succeeded then bundle job should be succeeded. 467 if (coordActions.size() == coordActionStatus.get(CoordinatorAction.Status.RUNNING)) { 468 coordStatus[0] = Job.Status.RUNNING; 469 ret = true; 470 } 471 else if (coordActionStatus.get(CoordinatorAction.Status.RUNNING) > 0) { 472 if ((coordActionStatus.containsKey(CoordinatorAction.Status.FAILED) && coordActionStatus.get(CoordinatorAction.Status.FAILED) > 0) 473 || (coordActionStatus.containsKey(CoordinatorAction.Status.KILLED) && coordActionStatus 474 .get(CoordinatorAction.Status.KILLED) > 0) 475 || (coordActionStatus.containsKey(CoordinatorAction.Status.TIMEDOUT) && coordActionStatus 476 .get(CoordinatorAction.Status.TIMEDOUT) > 0)) { 477 // coordStatus = Job.Status.RUNNINGWITHERROR; 478 // We need to change this to RUNNINGWIHERROR in future when we add this to coordinator 479 coordStatus[0] = Job.Status.RUNNING; 480 ret = true; 481 } 482 } 483 } 484 return ret; 485 } 486 487 private boolean checkRunningStatus(HashMap<Job.Status, Integer> bundleActionStatus, 488 List<BundleActionBean> bundleActions, Job.Status[] bundleStatus) { 489 boolean ret = false; 490 if (bundleActionStatus.containsKey(Job.Status.RUNNING)) { 491 // If all the bundle actions are succeeded then bundle job should be succeeded. 492 if (bundleActions.size() == bundleActionStatus.get(Job.Status.RUNNING)) { 493 bundleStatus[0] = Job.Status.RUNNING; 494 ret = true; 495 } 496 else if (bundleActionStatus.get(Job.Status.RUNNING) > 0) { 497 if ((bundleActionStatus.containsKey(Job.Status.FAILED) && bundleActionStatus.get(Job.Status.FAILED) > 0) 498 || (bundleActionStatus.containsKey(Job.Status.KILLED) && bundleActionStatus 499 .get(Job.Status.KILLED) > 0) 500 || (bundleActionStatus.containsKey(Job.Status.DONEWITHERROR) && bundleActionStatus 501 .get(Job.Status.DONEWITHERROR) > 0) 502 || (bundleActionStatus.containsKey(Job.Status.RUNNINGWITHERROR) && bundleActionStatus 503 .get(Job.Status.RUNNINGWITHERROR) > 0)) { 504 // bundleStatus = Job.Status.RUNNINGWITHERROR; 505 // We need to change this to RUNNINGWIHERROR in future when we add this to coordinator 506 bundleStatus[0] = Job.Status.RUNNING; 507 ret = true; 508 } 509 } 510 } 511 return ret; 512 } 513 514 private void updateBundleJob(HashMap<Job.Status, Integer> bundleActionStatus, 515 List<BundleActionBean> bundleActions, BundleJobBean bundleJob, Job.Status bundleStatus) 516 throws JPAExecutorException { 517 String jobId = bundleJob.getId(); 518 boolean pendingBundleJob = bundleJob.isPending(); 519 // Checking the bundle pending should be updated or not 520 int totalNonPendingActions = 0; 521 for (Job.Status js : bundleActionStatus.keySet()) { 522 totalNonPendingActions += bundleActionStatus.get(js); 523 } 524 525 if (totalNonPendingActions == bundleActions.size()) { 526 pendingBundleJob = false; 527 } 528 529 // Update the Bundle Job 530 bundleJob.setStatus(bundleStatus); 531 if (pendingBundleJob) { 532 bundleJob.setPending(); 533 LOG.info("Bundle job [" + jobId + "] Pending set to TRUE"); 534 } 535 else { 536 bundleJob.resetPending(); 537 LOG.info("Bundle job [" + jobId + "] Pending set to FALSE"); 538 } 539 jpaService.execute(new BundleJobUpdateJPAExecutor(bundleJob)); 540 } 541 542 private void updateCoordJob(HashMap<CoordinatorAction.Status, Integer> coordActionStatus, 543 List<CoordinatorActionBean> coordActions, CoordinatorJobBean coordJob, Job.Status coordStatus) 544 throws JPAExecutorException, CommandException { 545 Job.Status prevStatus = coordJob.getStatus(); 546 // Update the Coord Job 547 if (coordJob.getStatus() == Job.Status.SUCCEEDED || coordJob.getStatus() == Job.Status.FAILED 548 || coordJob.getStatus() == Job.Status.KILLED || coordJob.getStatus() == Job.Status.DONEWITHERROR) { 549 if (coordStatus == Job.Status.SUSPENDED) { 550 LOG.info("Coord Job [" + coordJob.getId() 551 + "] status can not be updated as its already in Terminal state"); 552 return; 553 } 554 } 555 556 checkCoordPending(coordActionStatus, coordActions, coordJob, false); 557 coordJob.setStatus(coordStatus); 558 coordJob.setStatus(StatusUtils.getStatus(coordJob)); 559 coordJob.setLastModifiedTime(new Date()); 560 jpaService.execute(new CoordJobUpdateJPAExecutor(coordJob)); 561 // update bundle action only when status changes in coord job 562 if (coordJob.getBundleId() != null) { 563 if (!prevStatus.equals(coordJob.getStatus())) { 564 BundleStatusUpdateXCommand bundleStatusUpdate = new BundleStatusUpdateXCommand(coordJob, prevStatus); 565 bundleStatusUpdate.call(); 566 } 567 } 568 } 569 570 private void checkCoordPending(HashMap<CoordinatorAction.Status, Integer> coordActionStatus, 571 List<CoordinatorActionBean> coordActions, CoordinatorJobBean coordJob, boolean saveToDB) throws JPAExecutorException { 572 boolean pendingCoordJob = coordJob.isPending(); 573 // Checking the coordinator pending should be updated or not 574 int totalNonPendingActions = 0; 575 for (CoordinatorAction.Status js : coordActionStatus.keySet()) { 576 totalNonPendingActions += coordActionStatus.get(js); 577 } 578 579 if (totalNonPendingActions == coordActions.size()) { 580 pendingCoordJob = false; 581 } 582 583 if (pendingCoordJob) { 584 coordJob.setPending(); 585 LOG.info("Coord job [" + coordJob.getId() + "] Pending set to TRUE"); 586 } 587 else { 588 coordJob.resetPending(); 589 LOG.info("Coord job [" + coordJob.getId() + "] Pending set to FALSE"); 590 } 591 592 if (saveToDB) { 593 jpaService.execute(new CoordJobUpdateJPAExecutor(coordJob)); 594 } 595 } 596 597 /** 598 * Aggregate coordinator actions' status to coordinator jobs 599 * 600 * @throws JPAExecutorException thrown if failed in db updates or retrievals 601 * @throws CommandException thrown if failed to run commands 602 */ 603 private void coordTransit() throws JPAExecutorException, CommandException { 604 List<CoordinatorJobBean> pendingJobCheckList = null; 605 if (lastInstanceStartTime == null) { 606 LOG.info("Running coordinator status service first instance"); 607 // this is the first instance, we need to check for all pending jobs; 608 pendingJobCheckList = jpaService.execute(new CoordJobsGetPendingJPAExecutor(limit)); 609 } 610 else { 611 LOG.info("Running coordinator status service from last instance time = " 612 + DateUtils.convertDateToString(lastInstanceStartTime)); 613 // this is not the first instance, we should only check jobs that have actions been 614 // updated >= start time of last service run; 615 List<CoordinatorActionBean> actionList = jpaService 616 .execute(new CoordActionsGetByLastModifiedTimeJPAExecutor(lastInstanceStartTime)); 617 Set<String> coordIds = new HashSet<String>(); 618 for (CoordinatorActionBean action : actionList) { 619 coordIds.add(action.getJobId()); 620 } 621 pendingJobCheckList = new ArrayList<CoordinatorJobBean>(); 622 for (String coordId : coordIds.toArray(new String[coordIds.size()])) { 623 CoordinatorJobBean coordJob = jpaService.execute(new CoordJobGetJPAExecutor(coordId)); 624 // Running coord job might have pending false 625 if (coordJob.isPending() || coordJob.getStatus().equals(Job.Status.RUNNING)) { 626 pendingJobCheckList.add(coordJob); 627 } 628 } 629 } 630 aggregateCoordJobsStatus(pendingJobCheckList); 631 } 632 } 633 634 /** 635 * Initializes the {@link StatusTransitService}. 636 * 637 * @param services services instance. 638 */ 639 @Override 640 public void init(Services services) { 641 Configuration conf = services.getConf(); 642 Runnable stateTransitRunnable = new StatusTransitRunnable(); 643 services.get(SchedulerService.class).schedule(stateTransitRunnable, 10, 644 conf.getInt(CONF_STATUSTRANSIT_INTERVAL, 60), SchedulerService.Unit.SEC); 645 } 646 647 /** 648 * Destroy the StateTransit Jobs Service. 649 */ 650 @Override 651 public void destroy() { 652 } 653 654 /** 655 * Return the public interface for the purge jobs service. 656 * 657 * @return {@link StatusTransitService}. 658 */ 659 @Override 660 public Class<? extends Service> getInterface() { 661 return StatusTransitService.class; 662 } 663 }