001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.command.coord; 016 017 import java.io.IOException; 018 import java.io.StringReader; 019 import java.util.Date; 020 import java.util.List; 021 022 import org.apache.hadoop.conf.Configuration; 023 import org.apache.hadoop.fs.Path; 024 import org.apache.oozie.CoordinatorActionBean; 025 import org.apache.oozie.CoordinatorJobBean; 026 import org.apache.oozie.ErrorCode; 027 import org.apache.oozie.client.CoordinatorAction; 028 import org.apache.oozie.client.Job; 029 import org.apache.oozie.client.OozieClient; 030 import org.apache.oozie.command.CommandException; 031 import org.apache.oozie.command.PreconditionException; 032 import org.apache.oozie.coord.CoordELEvaluator; 033 import org.apache.oozie.coord.CoordELFunctions; 034 import org.apache.oozie.executor.jpa.CoordActionGetJPAExecutor; 035 import org.apache.oozie.executor.jpa.CoordJobGetJPAExecutor; 036 import org.apache.oozie.executor.jpa.JPAExecutorException; 037 import org.apache.oozie.service.HadoopAccessorException; 038 import org.apache.oozie.service.HadoopAccessorService; 039 import org.apache.oozie.service.JPAService; 040 import org.apache.oozie.service.Services; 041 import org.apache.oozie.util.DateUtils; 042 import org.apache.oozie.util.ELEvaluator; 043 import org.apache.oozie.util.Instrumentation; 044 import org.apache.oozie.util.LogUtils; 045 import org.apache.oozie.util.ParamChecker; 046 import org.apache.oozie.util.StatusUtils; 047 import org.apache.oozie.util.XConfiguration; 048 import org.apache.oozie.util.XmlUtils; 049 import org.jdom.Element; 050 051 /** 052 * The command to check if an action's data input paths exist in the file system. 053 */ 054 public class CoordActionInputCheckXCommand extends CoordinatorXCommand<Void> { 055 056 private final String actionId; 057 private final int COMMAND_REQUEUE_INTERVAL = 60000; // 1 minute 058 private CoordinatorActionBean coordAction = null; 059 private CoordinatorJobBean coordJob = null; 060 private JPAService jpaService = null; 061 062 public CoordActionInputCheckXCommand(String actionId) { 063 super("coord_action_input", "coord_action_input", 1); 064 this.actionId = ParamChecker.notEmpty(actionId, "actionId"); 065 } 066 067 /* (non-Javadoc) 068 * @see org.apache.oozie.command.XCommand#execute() 069 */ 070 @Override 071 protected Void execute() throws CommandException { 072 LOG.info("[" + actionId + "]::ActionInputCheck:: Action is in WAITING state."); 073 074 // this action should only get processed if current time > nominal time; 075 // otherwise, requeue this action for delay execution; 076 Date nominalTime = coordAction.getNominalTime(); 077 Date currentTime = new Date(); 078 if (nominalTime.compareTo(currentTime) > 0) { 079 queue(new CoordActionInputCheckXCommand(coordAction.getId()), Math.max( 080 (nominalTime.getTime() - currentTime.getTime()), COMMAND_REQUEUE_INTERVAL)); 081 // update lastModifiedTime 082 coordAction.setLastModifiedTime(new Date()); 083 try { 084 jpaService.execute(new org.apache.oozie.executor.jpa.CoordActionUpdateJPAExecutor(coordAction)); 085 } 086 catch (JPAExecutorException e) { 087 throw new CommandException(e); 088 } 089 LOG.info("[" + actionId 090 + "]::ActionInputCheck:: nominal Time is newer than current time, so requeue and wait. Current=" 091 + currentTime + ", nominal=" + nominalTime); 092 093 return null; 094 } 095 096 StringBuilder actionXml = new StringBuilder(coordAction.getActionXml()); 097 Instrumentation.Cron cron = new Instrumentation.Cron(); 098 try { 099 Configuration actionConf = new XConfiguration(new StringReader(coordAction.getRunConf())); 100 cron.start(); 101 StringBuilder existList = new StringBuilder(); 102 StringBuilder nonExistList = new StringBuilder(); 103 StringBuilder nonResolvedList = new StringBuilder(); 104 CoordCommandUtils.getResolvedList(coordAction.getMissingDependencies(), nonExistList, nonResolvedList); 105 106 LOG.info("[" + actionId + "]::CoordActionInputCheck:: Missing deps:" + nonExistList.toString() + " " 107 + nonResolvedList.toString()); 108 boolean status = checkInput(actionXml, existList, nonExistList, actionConf); 109 coordAction.setLastModifiedTime(currentTime); 110 coordAction.setActionXml(actionXml.toString()); 111 if (nonResolvedList.length() > 0 && status == false) { 112 nonExistList.append(CoordCommandUtils.RESOLVED_UNRESOLVED_SEPARATOR).append(nonResolvedList); 113 } 114 coordAction.setMissingDependencies(nonExistList.toString()); 115 if (status == true) { 116 coordAction.setStatus(CoordinatorAction.Status.READY); 117 // pass jobID to the CoordActionReadyXCommand 118 queue(new CoordActionReadyXCommand(coordAction.getJobId()), 100); 119 } 120 else { 121 long waitingTime = (currentTime.getTime() - Math.max(coordAction.getNominalTime().getTime(), coordAction 122 .getCreatedTime().getTime())) 123 / (60 * 1000); 124 int timeOut = coordAction.getTimeOut(); 125 if ((timeOut >= 0) && (waitingTime > timeOut)) { 126 queue(new CoordActionTimeOutXCommand(coordAction), 100); 127 } 128 else { 129 queue(new CoordActionInputCheckXCommand(coordAction.getId()), COMMAND_REQUEUE_INTERVAL); 130 } 131 } 132 coordAction.setLastModifiedTime(new Date()); 133 jpaService.execute(new org.apache.oozie.executor.jpa.CoordActionUpdateJPAExecutor(coordAction)); 134 } 135 catch (Exception e) { 136 throw new CommandException(ErrorCode.E1021, e.getMessage(), e); 137 } 138 cron.stop(); 139 140 return null; 141 } 142 143 /** 144 * To check the list of input paths if all of them exist 145 * 146 * @param actionXml action xml 147 * @param existList the list of existed paths 148 * @param nonExistList the list of non existed paths 149 * @param conf action configuration 150 * @return true if all input paths are existed 151 * @throws Exception thrown of unable to check input path 152 */ 153 protected boolean checkInput(StringBuilder actionXml, StringBuilder existList, StringBuilder nonExistList, 154 Configuration conf) throws Exception { 155 Element eAction = XmlUtils.parseXml(actionXml.toString()); 156 boolean allExist = checkResolvedUris(eAction, existList, nonExistList, conf); 157 if (allExist) { 158 LOG.debug("[" + actionId + "]::ActionInputCheck:: Checking Latest/future"); 159 allExist = checkUnresolvedInstances(eAction, conf); 160 } 161 if (allExist == true) { 162 materializeDataProperties(eAction, conf); 163 actionXml.replace(0, actionXml.length(), XmlUtils.prettyPrint(eAction).toString()); 164 } 165 return allExist; 166 } 167 168 /** 169 * Materialize data properties defined in <action> tag. it includes dataIn(<DS>) and dataOut(<DS>) it creates a list 170 * of files that will be needed. 171 * 172 * @param eAction action element 173 * @param conf action configuration 174 * @throws Exception thrown if failed to resolve data properties 175 * @update modify 'Action' element with appropriate list of files. 176 */ 177 @SuppressWarnings("unchecked") 178 private void materializeDataProperties(Element eAction, Configuration conf) throws Exception { 179 ELEvaluator eval = CoordELEvaluator.createDataEvaluator(eAction, conf, actionId); 180 Element configElem = eAction.getChild("action", eAction.getNamespace()).getChild("workflow", 181 eAction.getNamespace()).getChild("configuration", eAction.getNamespace()); 182 if (configElem != null) { 183 for (Element propElem : (List<Element>) configElem.getChildren("property", configElem.getNamespace())) { 184 resolveTagContents("value", propElem, eval); 185 } 186 } 187 } 188 189 /** 190 * To resolve property value which contains el functions 191 * 192 * @param tagName tag name 193 * @param elem the child element of "property" element 194 * @param eval el functions evaluator 195 * @throws Exception thrown if unable to resolve tag value 196 */ 197 private void resolveTagContents(String tagName, Element elem, ELEvaluator eval) throws Exception { 198 if (elem == null) { 199 return; 200 } 201 Element tagElem = elem.getChild(tagName, elem.getNamespace()); 202 if (tagElem != null) { 203 String updated = CoordELFunctions.evalAndWrap(eval, tagElem.getText()); 204 tagElem.removeContent(); 205 tagElem.addContent(updated); 206 } 207 else { 208 LOG.warn(" Value NOT FOUND " + tagName); 209 } 210 } 211 212 /** 213 * Check if any unsolved paths under data output. Resolve the unresolved data input paths. 214 * 215 * @param eAction action element 216 * @param actionConf action configuration 217 * @return true if successful to resolve input and output paths 218 * @throws Exception thrown if failed to resolve data input and output paths 219 */ 220 @SuppressWarnings("unchecked") 221 private boolean checkUnresolvedInstances(Element eAction, Configuration actionConf) throws Exception { 222 String strAction = XmlUtils.prettyPrint(eAction).toString(); 223 Date nominalTime = DateUtils.parseDateUTC(eAction.getAttributeValue("action-nominal-time")); 224 String actualTimeStr = eAction.getAttributeValue("action-actual-time"); 225 Date actualTime = null; 226 if (actualTimeStr == null) { 227 LOG.debug("Unable to get action-actual-time from action xml, this job is submitted " + 228 "from previous version. Assign current date to actual time, action = " + actionId); 229 actualTime = new Date(); 230 } else { 231 actualTime = DateUtils.parseDateUTC(actualTimeStr); 232 } 233 234 StringBuffer resultedXml = new StringBuffer(); 235 236 boolean ret; 237 Element inputList = eAction.getChild("input-events", eAction.getNamespace()); 238 if (inputList != null) { 239 ret = materializeUnresolvedEvent(inputList.getChildren("data-in", eAction.getNamespace()), nominalTime, 240 actualTime, actionConf); 241 if (ret == false) { 242 resultedXml.append(strAction); 243 return false; 244 } 245 } 246 247 // Using latest() or future() in output-event is not intuitive. 248 // We need to make sure, this assumption is correct. 249 Element outputList = eAction.getChild("output-events", eAction.getNamespace()); 250 if (outputList != null) { 251 for (Element dEvent : (List<Element>) outputList.getChildren("data-out", eAction.getNamespace())) { 252 if (dEvent.getChild("unresolved-instances", dEvent.getNamespace()) != null) { 253 throw new CommandException(ErrorCode.E1006, "coord:latest()/future()", 254 " not permitted in output-event "); 255 } 256 } 257 } 258 return true; 259 } 260 261 /** 262 * Resolve the list of data input paths 263 * 264 * @param eDataEvents the list of data input elements 265 * @param nominalTime action nominal time 266 * @param actualTime current time 267 * @param conf action configuration 268 * @return true if all unresolved URIs can be resolved 269 * @throws Exception thrown if failed to resolve data input paths 270 */ 271 @SuppressWarnings("unchecked") 272 private boolean materializeUnresolvedEvent(List<Element> eDataEvents, Date nominalTime, Date actualTime, 273 Configuration conf) throws Exception { 274 for (Element dEvent : eDataEvents) { 275 if (dEvent.getChild("unresolved-instances", dEvent.getNamespace()) == null) { 276 continue; 277 } 278 ELEvaluator eval = CoordELEvaluator.createLazyEvaluator(actualTime, nominalTime, dEvent, conf); 279 String uresolvedInstance = dEvent.getChild("unresolved-instances", dEvent.getNamespace()).getTextTrim(); 280 String unresolvedList[] = uresolvedInstance.split(CoordELFunctions.INSTANCE_SEPARATOR); 281 StringBuffer resolvedTmp = new StringBuffer(); 282 for (int i = 0; i < unresolvedList.length; i++) { 283 String ret = CoordELFunctions.evalAndWrap(eval, unresolvedList[i]); 284 Boolean isResolved = (Boolean) eval.getVariable("is_resolved"); 285 if (isResolved == false) { 286 LOG.info("[" + actionId + "]::Cannot resolve: " + ret); 287 return false; 288 } 289 if (resolvedTmp.length() > 0) { 290 resolvedTmp.append(CoordELFunctions.INSTANCE_SEPARATOR); 291 } 292 resolvedTmp.append((String) eval.getVariable("resolved_path")); 293 } 294 if (resolvedTmp.length() > 0) { 295 if (dEvent.getChild("uris", dEvent.getNamespace()) != null) { 296 resolvedTmp.append(CoordELFunctions.INSTANCE_SEPARATOR).append( 297 dEvent.getChild("uris", dEvent.getNamespace()).getTextTrim()); 298 dEvent.removeChild("uris", dEvent.getNamespace()); 299 } 300 Element uriInstance = new Element("uris", dEvent.getNamespace()); 301 uriInstance.addContent(resolvedTmp.toString()); 302 dEvent.getContent().add(1, uriInstance); 303 } 304 dEvent.removeChild("unresolved-instances", dEvent.getNamespace()); 305 } 306 307 return true; 308 } 309 310 /** 311 * Check all resolved URIs existence 312 * 313 * @param eAction action element 314 * @param existList the list of existed paths 315 * @param nonExistList the list of paths to check existence 316 * @param conf action configuration 317 * @return true if all nonExistList paths exist 318 * @throws IOException thrown if unable to access the path 319 */ 320 private boolean checkResolvedUris(Element eAction, StringBuilder existList, StringBuilder nonExistList, 321 Configuration conf) throws IOException { 322 LOG.info("[" + actionId + "]::ActionInputCheck:: In checkResolvedUris..."); 323 Element inputList = eAction.getChild("input-events", eAction.getNamespace()); 324 if (inputList != null) { 325 if (nonExistList.length() > 0) { 326 checkListOfPaths(existList, nonExistList, conf); 327 } 328 return nonExistList.length() == 0; 329 } 330 return true; 331 } 332 333 /** 334 * Check a list of non existed paths and add to exist list if it exists 335 * 336 * @param existList the list of existed paths 337 * @param nonExistList the list of paths to check existence 338 * @param conf action configuration 339 * @return true if all nonExistList paths exist 340 * @throws IOException thrown if unable to access the path 341 */ 342 private boolean checkListOfPaths(StringBuilder existList, StringBuilder nonExistList, Configuration conf) 343 throws IOException { 344 345 String[] uriList = nonExistList.toString().split(CoordELFunctions.INSTANCE_SEPARATOR); 346 if (uriList[0] != null) { 347 LOG.info("[" + actionId + "]::ActionInputCheck:: In checkListOfPaths: " + uriList[0] + " is Missing."); 348 } 349 350 nonExistList.delete(0, nonExistList.length()); 351 boolean allExists = true; 352 String existSeparator = "", nonExistSeparator = ""; 353 for (int i = 0; i < uriList.length; i++) { 354 if (allExists) { 355 allExists = pathExists(uriList[i], conf); 356 LOG.info("[" + actionId + "]::ActionInputCheck:: File:" + uriList[i] + ", Exists? :" + allExists); 357 } 358 if (allExists) { 359 existList.append(existSeparator).append(uriList[i]); 360 existSeparator = CoordELFunctions.INSTANCE_SEPARATOR; 361 } 362 else { 363 nonExistList.append(nonExistSeparator).append(uriList[i]); 364 nonExistSeparator = CoordELFunctions.INSTANCE_SEPARATOR; 365 } 366 } 367 return allExists; 368 } 369 370 /** 371 * Check if given path exists 372 * 373 * @param sPath uri path 374 * @param actionConf action configuration 375 * @return true if path exists 376 * @throws IOException thrown if unable to access the path 377 */ 378 private boolean pathExists(String sPath, Configuration actionConf) throws IOException { 379 LOG.debug("checking for the file " + sPath); 380 Path path = new Path(sPath); 381 String user = ParamChecker.notEmpty(actionConf.get(OozieClient.USER_NAME), OozieClient.USER_NAME); 382 String group = ParamChecker.notEmpty(actionConf.get(OozieClient.GROUP_NAME), OozieClient.GROUP_NAME); 383 try { 384 return Services.get().get(HadoopAccessorService.class).createFileSystem(user, group, path.toUri(), 385 new Configuration()).exists(path); 386 } 387 catch (HadoopAccessorException e) { 388 throw new IOException(e); 389 } 390 } 391 392 /** 393 * The function create a list of URIs separated by "," using the instances time stamp and URI-template 394 * 395 * @param event : <data-in> event 396 * @param instances : List of time stamp seprated by "," 397 * @param unresolvedInstances : list of instance with latest/future function 398 * @return : list of URIs separated by ",". 399 * @throws Exception thrown if failed to create URIs from unresolvedInstances 400 */ 401 @SuppressWarnings("unused") 402 private String createURIs(Element event, String instances, StringBuilder unresolvedInstances) throws Exception { 403 if (instances == null || instances.length() == 0) { 404 return ""; 405 } 406 String[] instanceList = instances.split(CoordELFunctions.INSTANCE_SEPARATOR); 407 StringBuilder uris = new StringBuilder(); 408 409 for (int i = 0; i < instanceList.length; i++) { 410 int funcType = CoordCommandUtils.getFuncType(instanceList[i]); 411 if (funcType == CoordCommandUtils.LATEST || funcType == CoordCommandUtils.FUTURE) { 412 if (unresolvedInstances.length() > 0) { 413 unresolvedInstances.append(CoordELFunctions.INSTANCE_SEPARATOR); 414 } 415 unresolvedInstances.append(instanceList[i]); 416 continue; 417 } 418 ELEvaluator eval = CoordELEvaluator.createURIELEvaluator(instanceList[i]); 419 if (uris.length() > 0) { 420 uris.append(CoordELFunctions.INSTANCE_SEPARATOR); 421 } 422 uris.append(CoordELFunctions.evalAndWrap(eval, event.getChild("dataset", event.getNamespace()).getChild( 423 "uri-template", event.getNamespace()).getTextTrim())); 424 } 425 return uris.toString(); 426 } 427 428 /* (non-Javadoc) 429 * @see org.apache.oozie.command.XCommand#getEntityKey() 430 */ 431 @Override 432 protected String getEntityKey() { 433 return coordAction.getJobId(); 434 } 435 436 /* (non-Javadoc) 437 * @see org.apache.oozie.command.XCommand#isLockRequired() 438 */ 439 @Override 440 protected boolean isLockRequired() { 441 return true; 442 } 443 444 /* (non-Javadoc) 445 * @see org.apache.oozie.command.XCommand#eagerLoadState() 446 */ 447 @Override 448 protected void eagerLoadState() throws CommandException { 449 loadState(); 450 } 451 452 /* (non-Javadoc) 453 * @see org.apache.oozie.command.XCommand#loadState() 454 */ 455 @Override 456 protected void loadState() throws CommandException { 457 if (jpaService == null) { 458 jpaService = Services.get().get(JPAService.class); 459 } 460 try { 461 coordAction = jpaService.execute(new CoordActionGetJPAExecutor(actionId)); 462 coordJob = jpaService.execute(new CoordJobGetJPAExecutor(coordAction.getJobId())); 463 } 464 catch (JPAExecutorException je) { 465 throw new CommandException(je); 466 } 467 LogUtils.setLogInfo(coordAction, logInfo); 468 } 469 470 /* (non-Javadoc) 471 * @see org.apache.oozie.command.XCommand#verifyPrecondition() 472 */ 473 @Override 474 protected void verifyPrecondition() throws CommandException, PreconditionException { 475 if (coordAction.getStatus() != CoordinatorActionBean.Status.WAITING) { 476 throw new PreconditionException(ErrorCode.E1100, "[" + actionId 477 + "]::CoordActionInputCheck:: Ignoring action. Should be in WAITING state, but state=" 478 + coordAction.getStatus()); 479 } 480 481 // if eligible to do action input check when running with backward support is true 482 if (StatusUtils.getStatusForCoordActionInputCheck(coordJob)) { 483 return; 484 } 485 486 if (coordJob.getStatus() != Job.Status.RUNNING && coordJob.getStatus() != Job.Status.PAUSED 487 && coordJob.getStatus() != Job.Status.PAUSEDWITHERROR) { 488 throw new PreconditionException( 489 ErrorCode.E1100, "["+ actionId + "]::CoordActionInputCheck:: Ignoring action." + 490 " Coordinator job is not in RUNNING/PAUSED/PAUSEDWITHERROR state, but state=" 491 + coordJob.getStatus()); 492 } 493 } 494 495 /* (non-Javadoc) 496 * @see org.apache.oozie.command.XCommand#getKey() 497 */ 498 @Override 499 public String getKey(){ 500 return getName() + "_" + actionId; 501 } 502 503 }