001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.command.coord; 016 017 import org.apache.hadoop.conf.Configuration; 018 019 import org.apache.oozie.client.CoordinatorAction; 020 import org.apache.oozie.client.OozieClient; 021 import org.apache.oozie.CoordinatorActionBean; 022 import org.apache.oozie.DagEngine; 023 import org.apache.oozie.DagEngineException; 024 import org.apache.oozie.ErrorCode; 025 import org.apache.oozie.WorkflowJobBean; 026 import org.apache.oozie.command.CommandException; 027 import org.apache.oozie.service.DagEngineService; 028 import org.apache.oozie.service.WorkflowStoreService; 029 import org.apache.oozie.store.StoreException; 030 import org.apache.oozie.store.CoordinatorStore; 031 import org.apache.oozie.store.WorkflowStore; 032 import org.apache.oozie.service.Services; 033 import org.apache.oozie.util.JobUtils; 034 import org.apache.oozie.util.ParamChecker; 035 import org.apache.oozie.util.XLog; 036 import org.apache.oozie.util.XmlUtils; 037 import org.apache.oozie.util.XConfiguration; 038 import org.apache.oozie.util.db.SLADbOperations; 039 import org.apache.oozie.client.SLAEvent.SlaAppType; 040 import org.apache.oozie.client.SLAEvent.Status; 041 042 import org.jdom.Element; 043 import org.jdom.JDOMException; 044 045 import java.io.IOException; 046 import java.io.StringReader; 047 048 public class CoordActionStartCommand extends CoordinatorCommand<Void> { 049 050 public static final String EL_ERROR = "EL_ERROR"; 051 public static final String EL_EVAL_ERROR = "EL_EVAL_ERROR"; 052 public static final String COULD_NOT_START = "COULD_NOT_START"; 053 public static final String START_DATA_MISSING = "START_DATA_MISSING"; 054 public static final String EXEC_DATA_MISSING = "EXEC_DATA_MISSING"; 055 056 private final XLog log = XLog.getLog(getClass()); 057 private String actionId = null; 058 private String user = null; 059 private String authToken = null; 060 private CoordinatorActionBean coordAction = null; 061 062 public CoordActionStartCommand(String id, String user, String token) { 063 super("coord_action_start", "coord_action_start", 1, XLog.OPS); 064 this.actionId = ParamChecker.notEmpty(id, "id"); 065 this.user = ParamChecker.notEmpty(user, "user"); 066 this.authToken = ParamChecker.notEmpty(token, "token"); 067 } 068 069 /** 070 * Create config to pass to WF Engine 1. Get createdConf from coord_actions table 2. Get actionXml from 071 * coord_actions table. Extract all 'property' tags and merge createdConf (overwrite duplicate keys). 3. Extract 072 * 'app-path' from actionXML. Create a new property called 'oozie.wf.application.path' and merge with createdConf 073 * (overwrite duplicate keys) 4. Read contents of config-default.xml in workflow directory. 5. Merge createdConf 074 * with config-default.xml (overwrite duplicate keys). 6. Results is runConf which is saved in coord_actions table. 075 * Merge Action createdConf with actionXml to create new runConf with replaced variables 076 * 077 * @param action CoordinatorActionBean 078 * @return Configuration 079 * @throws CommandException 080 */ 081 private Configuration mergeConfig(CoordinatorActionBean action) throws CommandException { 082 String createdConf = action.getCreatedConf(); 083 String actionXml = action.getActionXml(); 084 Element workflowProperties = null; 085 try { 086 workflowProperties = XmlUtils.parseXml(actionXml); 087 } 088 catch (JDOMException e1) { 089 log.warn("Configuration parse error in:" + actionXml); 090 throw new CommandException(ErrorCode.E1005, e1.getMessage(), e1); 091 } 092 // generate the 'runConf' for this action 093 // Step 1: runConf = createdConf 094 Configuration runConf = null; 095 try { 096 runConf = new XConfiguration(new StringReader(createdConf)); 097 } 098 catch (IOException e1) { 099 log.warn("Configuration parse error in:" + createdConf); 100 throw new CommandException(ErrorCode.E1005, e1.getMessage(), e1); 101 } 102 // Step 2: Merge local properties into runConf 103 // extract 'property' tags under 'configuration' block in the 104 // coordinator.xml (saved in actionxml column) 105 // convert Element to XConfiguration 106 Element configElement = (Element) workflowProperties.getChild("action", workflowProperties.getNamespace()) 107 .getChild("workflow", workflowProperties.getNamespace()).getChild("configuration", 108 workflowProperties.getNamespace()); 109 if (configElement != null) { 110 String strConfig = XmlUtils.prettyPrint(configElement).toString(); 111 Configuration localConf; 112 try { 113 localConf = new XConfiguration(new StringReader(strConfig)); 114 } 115 catch (IOException e1) { 116 log.warn("Configuration parse error in:" + strConfig); 117 throw new CommandException(ErrorCode.E1005, e1.getMessage(), e1); 118 } 119 120 // copy configuration properties in coordinator.xml to the runConf 121 XConfiguration.copy(localConf, runConf); 122 } 123 124 // Step 3: Extract value of 'app-path' in actionxml, and save it as a 125 // new property called 'oozie.wf.application.path' 126 // WF Engine requires the path to the workflow.xml to be saved under 127 // this property name 128 String appPath = workflowProperties.getChild("action", workflowProperties.getNamespace()).getChild("workflow", 129 workflowProperties.getNamespace()).getChild("app-path", workflowProperties.getNamespace()).getValue(); 130 runConf.set("oozie.wf.application.path", appPath); 131 return runConf; 132 } 133 134 @Override 135 protected Void call(CoordinatorStore store) throws StoreException, CommandException { 136 boolean makeFail = true; 137 String errCode = ""; 138 String errMsg = ""; 139 ParamChecker.notEmpty(user, "user"); 140 ParamChecker.notEmpty(authToken, "authToken"); 141 142 // CoordinatorActionBean coordAction = store.getCoordinatorAction(id, true); 143 log.debug("actionid=" + actionId + ", status=" + coordAction.getStatus()); 144 if (coordAction.getStatus() == CoordinatorAction.Status.SUBMITTED) { 145 // log.debug("getting.. job id: " + coordAction.getJobId()); 146 // create merged runConf to pass to WF Engine 147 Configuration runConf = mergeConfig(coordAction); 148 coordAction.setRunConf(XmlUtils.prettyPrint(runConf).toString()); 149 // log.debug("%%% merged runconf=" + XmlUtils.prettyPrint(runConf).toString()); 150 DagEngine dagEngine = Services.get().get(DagEngineService.class).getDagEngine(user, authToken); 151 try { 152 boolean startJob = true; 153 Configuration conf = new XConfiguration(new StringReader(coordAction.getRunConf())); 154 SLADbOperations.writeStausEvent(coordAction.getSlaXml(), coordAction.getId(), store, Status.STARTED, 155 SlaAppType.COORDINATOR_ACTION); 156 157 // Normalize workflow appPath here; 158 JobUtils.normalizeAppPath(conf.get(OozieClient.USER_NAME), conf.get(OozieClient.GROUP_NAME), conf); 159 160 String wfId = dagEngine.submitJob(conf, startJob); 161 coordAction.setStatus(CoordinatorAction.Status.RUNNING); 162 coordAction.setExternalId(wfId); 163 store.updateCoordinatorAction(coordAction); 164 165 log.debug("Updating WF record for WFID :" + wfId + " with parent id: " + actionId); 166 WorkflowStore wfStore = Services.get().get(WorkflowStoreService.class).create(store); 167 WorkflowJobBean wfJob = wfStore.getWorkflow(wfId, false); 168 wfJob.setParentId(actionId); 169 wfStore.updateWorkflow(wfJob); 170 171 makeFail = false; 172 } 173 catch (StoreException se) { 174 makeFail = false; 175 throw se; 176 } 177 catch (DagEngineException dee) { 178 errMsg = dee.getMessage(); 179 errCode = "E1005"; 180 log.warn("can not create DagEngine for submitting jobs", dee); 181 } 182 catch (CommandException ce) { 183 errMsg = ce.getMessage(); 184 errCode = ce.getErrorCode().toString(); 185 log.warn("command exception occured ", ce); 186 } 187 catch (java.io.IOException ioe) { 188 errMsg = ioe.getMessage(); 189 errCode = "E1005"; 190 log.warn("Configuration parse error. read from DB :" + coordAction.getRunConf(), ioe); 191 } 192 catch (Exception ex) { 193 errMsg = ex.getMessage(); 194 errCode = "E1005"; 195 log.warn("can not create DagEngine for submitting jobs", ex); 196 } 197 finally { 198 if (makeFail == true) { // No DB exception occurs 199 log.warn("Failing the action " + coordAction.getId() + ". Because " + errCode + " : " + errMsg); 200 coordAction.setStatus(CoordinatorAction.Status.FAILED); 201 if (errMsg.length() > 254) { // Because table column size is 255 202 errMsg = errMsg.substring(0, 255); 203 } 204 coordAction.setErrorMessage(errMsg); 205 coordAction.setErrorCode(errCode); 206 store.updateCoordinatorAction(coordAction); 207 queueCallable(new CoordActionReadyCommand(coordAction.getJobId())); 208 } 209 } 210 } 211 return null; 212 } 213 214 @Override 215 protected Void execute(CoordinatorStore store) throws StoreException, CommandException { 216 log.info("STARTED CoordActionStartCommand actionId=" + actionId); 217 try { 218 coordAction = store.getEntityManager().find(CoordinatorActionBean.class, actionId); 219 setLogInfo(coordAction); 220 if (lock(coordAction.getJobId())) { 221 call(store); 222 } 223 else { 224 queueCallable(new CoordActionStartCommand(actionId, user, authToken), LOCK_FAILURE_REQUEUE_INTERVAL); 225 log.warn("CoordActionStartCommand lock was not acquired - failed jobId=" + coordAction.getJobId() 226 + ", actionId=" + actionId + ". Requeing the same."); 227 } 228 } 229 catch (InterruptedException e) { 230 queueCallable(new CoordActionStartCommand(actionId, user, authToken), LOCK_FAILURE_REQUEUE_INTERVAL); 231 log.warn("CoordActionStartCommand lock acquiring failed with exception " + e.getMessage() + " for jobId=" 232 + coordAction.getJobId() + ", actionId=" + actionId + " Requeing the same."); 233 } 234 finally { 235 log.info("ENDED CoordActionStartCommand actionId=" + actionId); 236 } 237 return null; 238 } 239 }