001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.coord;
016    
017    import java.io.IOException;
018    import java.io.StringReader;
019    import java.util.Date;
020    import java.util.List;
021    
022    import org.apache.hadoop.conf.Configuration;
023    import org.apache.hadoop.fs.Path;
024    import org.apache.oozie.CoordinatorActionBean;
025    import org.apache.oozie.CoordinatorJobBean;
026    import org.apache.oozie.ErrorCode;
027    import org.apache.oozie.client.CoordinatorAction;
028    import org.apache.oozie.client.Job;
029    import org.apache.oozie.client.OozieClient;
030    import org.apache.oozie.command.CommandException;
031    import org.apache.oozie.command.PreconditionException;
032    import org.apache.oozie.coord.CoordELEvaluator;
033    import org.apache.oozie.coord.CoordELFunctions;
034    import org.apache.oozie.executor.jpa.CoordActionGetJPAExecutor;
035    import org.apache.oozie.executor.jpa.CoordJobGetJPAExecutor;
036    import org.apache.oozie.executor.jpa.JPAExecutorException;
037    import org.apache.oozie.service.HadoopAccessorException;
038    import org.apache.oozie.service.HadoopAccessorService;
039    import org.apache.oozie.service.JPAService;
040    import org.apache.oozie.service.Services;
041    import org.apache.oozie.util.DateUtils;
042    import org.apache.oozie.util.ELEvaluator;
043    import org.apache.oozie.util.Instrumentation;
044    import org.apache.oozie.util.LogUtils;
045    import org.apache.oozie.util.ParamChecker;
046    import org.apache.oozie.util.StatusUtils;
047    import org.apache.oozie.util.XConfiguration;
048    import org.apache.oozie.util.XmlUtils;
049    import org.jdom.Element;
050    
051    /**
052     * The command to check if an action's data input paths exist in the file system.
053     */
054    public class CoordActionInputCheckXCommand extends CoordinatorXCommand<Void> {
055    
056        private final String actionId;
057        private final int COMMAND_REQUEUE_INTERVAL = 60000; // 1 minute
058        private CoordinatorActionBean coordAction = null;
059        private CoordinatorJobBean coordJob = null;
060        private JPAService jpaService = null;
061    
062        public CoordActionInputCheckXCommand(String actionId) {
063            super("coord_action_input", "coord_action_input", 1);
064            this.actionId = ParamChecker.notEmpty(actionId, "actionId");
065        }
066    
067        /* (non-Javadoc)
068         * @see org.apache.oozie.command.XCommand#execute()
069         */
070        @Override
071        protected Void execute() throws CommandException {
072            LOG.info("[" + actionId + "]::ActionInputCheck:: Action is in WAITING state.");
073    
074            // this action should only get processed if current time > nominal time;
075            // otherwise, requeue this action for delay execution;
076            Date nominalTime = coordAction.getNominalTime();
077            Date currentTime = new Date();
078            if (nominalTime.compareTo(currentTime) > 0) {
079                queue(new CoordActionInputCheckXCommand(coordAction.getId()), Math.max(
080                        (nominalTime.getTime() - currentTime.getTime()), COMMAND_REQUEUE_INTERVAL));
081                // update lastModifiedTime
082                coordAction.setLastModifiedTime(new Date());
083                try {
084                    jpaService.execute(new org.apache.oozie.executor.jpa.CoordActionUpdateJPAExecutor(coordAction));
085                }
086                catch (JPAExecutorException e) {
087                    throw new CommandException(e);
088                }
089                LOG.info("[" + actionId
090                        + "]::ActionInputCheck:: nominal Time is newer than current time, so requeue and wait. Current="
091                        + currentTime + ", nominal=" + nominalTime);
092    
093                return null;
094            }
095    
096            StringBuilder actionXml = new StringBuilder(coordAction.getActionXml());
097            Instrumentation.Cron cron = new Instrumentation.Cron();
098            try {
099                Configuration actionConf = new XConfiguration(new StringReader(coordAction.getRunConf()));
100                cron.start();
101                StringBuilder existList = new StringBuilder();
102                StringBuilder nonExistList = new StringBuilder();
103                StringBuilder nonResolvedList = new StringBuilder();
104                CoordCommandUtils.getResolvedList(coordAction.getMissingDependencies(), nonExistList, nonResolvedList);
105    
106                LOG.info("[" + actionId + "]::CoordActionInputCheck:: Missing deps:" + nonExistList.toString() + " "
107                        + nonResolvedList.toString());
108                boolean status = checkInput(actionXml, existList, nonExistList, actionConf);
109                coordAction.setLastModifiedTime(currentTime);
110                coordAction.setActionXml(actionXml.toString());
111                if (nonResolvedList.length() > 0 && status == false) {
112                    nonExistList.append(CoordCommandUtils.RESOLVED_UNRESOLVED_SEPARATOR).append(nonResolvedList);
113                }
114                coordAction.setMissingDependencies(nonExistList.toString());
115                if (status == true) {
116                    coordAction.setStatus(CoordinatorAction.Status.READY);
117                    // pass jobID to the CoordActionReadyXCommand
118                    queue(new CoordActionReadyXCommand(coordAction.getJobId()), 100);
119                }
120                else {
121                    long waitingTime = (currentTime.getTime() - Math.max(coordAction.getNominalTime().getTime(), coordAction
122                            .getCreatedTime().getTime()))
123                            / (60 * 1000);
124                    int timeOut = coordAction.getTimeOut();
125                    if ((timeOut >= 0) && (waitingTime > timeOut)) {
126                        queue(new CoordActionTimeOutXCommand(coordAction), 100);
127                    }
128                    else {
129                        queue(new CoordActionInputCheckXCommand(coordAction.getId()), COMMAND_REQUEUE_INTERVAL);
130                    }
131                }
132                coordAction.setLastModifiedTime(new Date());
133                jpaService.execute(new org.apache.oozie.executor.jpa.CoordActionUpdateJPAExecutor(coordAction));
134            }
135            catch (Exception e) {
136                throw new CommandException(ErrorCode.E1021, e.getMessage(), e);
137            }
138            cron.stop();
139    
140            return null;
141        }
142    
143        /**
144         * To check the list of input paths if all of them exist
145         *
146         * @param actionXml action xml
147         * @param existList the list of existed paths
148         * @param nonExistList the list of non existed paths
149         * @param conf action configuration
150         * @return true if all input paths are existed
151         * @throws Exception thrown of unable to check input path
152         */
153        protected boolean checkInput(StringBuilder actionXml, StringBuilder existList, StringBuilder nonExistList,
154                Configuration conf) throws Exception {
155            Element eAction = XmlUtils.parseXml(actionXml.toString());
156            boolean allExist = checkResolvedUris(eAction, existList, nonExistList, conf);
157            if (allExist) {
158                LOG.debug("[" + actionId + "]::ActionInputCheck:: Checking Latest/future");
159                allExist = checkUnresolvedInstances(eAction, conf);
160            }
161            if (allExist == true) {
162                materializeDataProperties(eAction, conf);
163                actionXml.replace(0, actionXml.length(), XmlUtils.prettyPrint(eAction).toString());
164            }
165            return allExist;
166        }
167    
168        /**
169         * Materialize data properties defined in <action> tag. it includes dataIn(<DS>) and dataOut(<DS>) it creates a list
170         * of files that will be needed.
171         *
172         * @param eAction action element
173         * @param conf action configuration
174         * @throws Exception thrown if failed to resolve data properties
175         * @update modify 'Action' element with appropriate list of files.
176         */
177        @SuppressWarnings("unchecked")
178        private void materializeDataProperties(Element eAction, Configuration conf) throws Exception {
179            ELEvaluator eval = CoordELEvaluator.createDataEvaluator(eAction, conf, actionId);
180            Element configElem = eAction.getChild("action", eAction.getNamespace()).getChild("workflow",
181                    eAction.getNamespace()).getChild("configuration", eAction.getNamespace());
182            if (configElem != null) {
183                for (Element propElem : (List<Element>) configElem.getChildren("property", configElem.getNamespace())) {
184                    resolveTagContents("value", propElem, eval);
185                }
186            }
187        }
188    
189        /**
190         * To resolve property value which contains el functions
191         *
192         * @param tagName tag name
193         * @param elem the child element of "property" element
194         * @param eval el functions evaluator
195         * @throws Exception thrown if unable to resolve tag value
196         */
197        private void resolveTagContents(String tagName, Element elem, ELEvaluator eval) throws Exception {
198            if (elem == null) {
199                return;
200            }
201            Element tagElem = elem.getChild(tagName, elem.getNamespace());
202            if (tagElem != null) {
203                String updated = CoordELFunctions.evalAndWrap(eval, tagElem.getText());
204                tagElem.removeContent();
205                tagElem.addContent(updated);
206            }
207            else {
208                LOG.warn(" Value NOT FOUND " + tagName);
209            }
210        }
211    
212        /**
213         * Check if any unsolved paths under data output. Resolve the unresolved data input paths.
214         *
215         * @param eAction action element
216         * @param actionConf action configuration
217         * @return true if successful to resolve input and output paths
218         * @throws Exception thrown if failed to resolve data input and output paths
219         */
220        @SuppressWarnings("unchecked")
221        private boolean checkUnresolvedInstances(Element eAction, Configuration actionConf) throws Exception {
222            String strAction = XmlUtils.prettyPrint(eAction).toString();
223            Date nominalTime = DateUtils.parseDateUTC(eAction.getAttributeValue("action-nominal-time"));
224            String actualTimeStr = eAction.getAttributeValue("action-actual-time");
225            Date actualTime = null;
226            if (actualTimeStr == null) {
227                LOG.debug("Unable to get action-actual-time from action xml, this job is submitted " +
228                "from previous version. Assign current date to actual time, action = " + actionId);
229                actualTime = new Date();
230            } else {
231                actualTime = DateUtils.parseDateUTC(actualTimeStr);
232            }
233    
234            StringBuffer resultedXml = new StringBuffer();
235    
236            boolean ret;
237            Element inputList = eAction.getChild("input-events", eAction.getNamespace());
238            if (inputList != null) {
239                ret = materializeUnresolvedEvent(inputList.getChildren("data-in", eAction.getNamespace()), nominalTime,
240                        actualTime, actionConf);
241                if (ret == false) {
242                    resultedXml.append(strAction);
243                    return false;
244                }
245            }
246    
247            // Using latest() or future() in output-event is not intuitive.
248            // We need to make sure, this assumption is correct.
249            Element outputList = eAction.getChild("output-events", eAction.getNamespace());
250            if (outputList != null) {
251                for (Element dEvent : (List<Element>) outputList.getChildren("data-out", eAction.getNamespace())) {
252                    if (dEvent.getChild("unresolved-instances", dEvent.getNamespace()) != null) {
253                        throw new CommandException(ErrorCode.E1006, "coord:latest()/future()",
254                                " not permitted in output-event ");
255                    }
256                }
257            }
258            return true;
259        }
260    
261        /**
262         * Resolve the list of data input paths
263         *
264         * @param eDataEvents the list of data input elements
265         * @param nominalTime action nominal time
266         * @param actualTime current time
267         * @param conf action configuration
268         * @return true if all unresolved URIs can be resolved
269         * @throws Exception thrown if failed to resolve data input paths
270         */
271        @SuppressWarnings("unchecked")
272        private boolean materializeUnresolvedEvent(List<Element> eDataEvents, Date nominalTime, Date actualTime,
273                Configuration conf) throws Exception {
274            for (Element dEvent : eDataEvents) {
275                if (dEvent.getChild("unresolved-instances", dEvent.getNamespace()) == null) {
276                    continue;
277                }
278                ELEvaluator eval = CoordELEvaluator.createLazyEvaluator(actualTime, nominalTime, dEvent, conf);
279                String uresolvedInstance = dEvent.getChild("unresolved-instances", dEvent.getNamespace()).getTextTrim();
280                String unresolvedList[] = uresolvedInstance.split(CoordELFunctions.INSTANCE_SEPARATOR);
281                StringBuffer resolvedTmp = new StringBuffer();
282                for (int i = 0; i < unresolvedList.length; i++) {
283                    String ret = CoordELFunctions.evalAndWrap(eval, unresolvedList[i]);
284                    Boolean isResolved = (Boolean) eval.getVariable("is_resolved");
285                    if (isResolved == false) {
286                        LOG.info("[" + actionId + "]::Cannot resolve: " + ret);
287                        return false;
288                    }
289                    if (resolvedTmp.length() > 0) {
290                        resolvedTmp.append(CoordELFunctions.INSTANCE_SEPARATOR);
291                    }
292                    resolvedTmp.append((String) eval.getVariable("resolved_path"));
293                }
294                if (resolvedTmp.length() > 0) {
295                    if (dEvent.getChild("uris", dEvent.getNamespace()) != null) {
296                        resolvedTmp.append(CoordELFunctions.INSTANCE_SEPARATOR).append(
297                                dEvent.getChild("uris", dEvent.getNamespace()).getTextTrim());
298                        dEvent.removeChild("uris", dEvent.getNamespace());
299                    }
300                    Element uriInstance = new Element("uris", dEvent.getNamespace());
301                    uriInstance.addContent(resolvedTmp.toString());
302                    dEvent.getContent().add(1, uriInstance);
303                }
304                dEvent.removeChild("unresolved-instances", dEvent.getNamespace());
305            }
306    
307            return true;
308        }
309    
310        /**
311         * Check all resolved URIs existence
312         *
313         * @param eAction action element
314         * @param existList the list of existed paths
315         * @param nonExistList the list of paths to check existence
316         * @param conf action configuration
317         * @return true if all nonExistList paths exist
318         * @throws IOException thrown if unable to access the path
319         */
320        private boolean checkResolvedUris(Element eAction, StringBuilder existList, StringBuilder nonExistList,
321                Configuration conf) throws IOException {
322            LOG.info("[" + actionId + "]::ActionInputCheck:: In checkResolvedUris...");
323            Element inputList = eAction.getChild("input-events", eAction.getNamespace());
324            if (inputList != null) {
325                if (nonExistList.length() > 0) {
326                    checkListOfPaths(existList, nonExistList, conf);
327                }
328                return nonExistList.length() == 0;
329            }
330            return true;
331        }
332    
333        /**
334         * Check a list of non existed paths and add to exist list if it exists
335         *
336         * @param existList the list of existed paths
337         * @param nonExistList the list of paths to check existence
338         * @param conf action configuration
339         * @return true if all nonExistList paths exist
340         * @throws IOException thrown if unable to access the path
341         */
342        private boolean checkListOfPaths(StringBuilder existList, StringBuilder nonExistList, Configuration conf)
343                throws IOException {
344    
345            String[] uriList = nonExistList.toString().split(CoordELFunctions.INSTANCE_SEPARATOR);
346            if (uriList[0] != null) {
347                LOG.info("[" + actionId + "]::ActionInputCheck:: In checkListOfPaths: " + uriList[0] + " is Missing.");
348            }
349    
350            nonExistList.delete(0, nonExistList.length());
351            boolean allExists = true;
352            String existSeparator = "", nonExistSeparator = "";
353            for (int i = 0; i < uriList.length; i++) {
354                if (allExists) {
355                    allExists = pathExists(uriList[i], conf);
356                    LOG.info("[" + actionId + "]::ActionInputCheck:: File:" + uriList[i] + ", Exists? :" + allExists);
357                }
358                if (allExists) {
359                    existList.append(existSeparator).append(uriList[i]);
360                    existSeparator = CoordELFunctions.INSTANCE_SEPARATOR;
361                }
362                else {
363                    nonExistList.append(nonExistSeparator).append(uriList[i]);
364                    nonExistSeparator = CoordELFunctions.INSTANCE_SEPARATOR;
365                }
366            }
367            return allExists;
368        }
369    
370        /**
371         * Check if given path exists
372         *
373         * @param sPath uri path
374         * @param actionConf action configuration
375         * @return true if path exists
376         * @throws IOException thrown if unable to access the path
377         */
378        private boolean pathExists(String sPath, Configuration actionConf) throws IOException {
379            LOG.debug("checking for the file " + sPath);
380            Path path = new Path(sPath);
381            String user = ParamChecker.notEmpty(actionConf.get(OozieClient.USER_NAME), OozieClient.USER_NAME);
382            String group = ParamChecker.notEmpty(actionConf.get(OozieClient.GROUP_NAME), OozieClient.GROUP_NAME);
383            try {
384                return Services.get().get(HadoopAccessorService.class).createFileSystem(user, group, path.toUri(),
385                        new Configuration()).exists(path);
386            }
387            catch (HadoopAccessorException e) {
388                throw new IOException(e);
389            }
390        }
391    
392        /**
393         * The function create a list of URIs separated by "," using the instances time stamp and URI-template
394         *
395         * @param event : <data-in> event
396         * @param instances : List of time stamp seprated by ","
397         * @param unresolvedInstances : list of instance with latest/future function
398         * @return : list of URIs separated by ",".
399         * @throws Exception thrown if failed to create URIs from unresolvedInstances
400         */
401        @SuppressWarnings("unused")
402        private String createURIs(Element event, String instances, StringBuilder unresolvedInstances) throws Exception {
403            if (instances == null || instances.length() == 0) {
404                return "";
405            }
406            String[] instanceList = instances.split(CoordELFunctions.INSTANCE_SEPARATOR);
407            StringBuilder uris = new StringBuilder();
408    
409            for (int i = 0; i < instanceList.length; i++) {
410                int funcType = CoordCommandUtils.getFuncType(instanceList[i]);
411                if (funcType == CoordCommandUtils.LATEST || funcType == CoordCommandUtils.FUTURE) {
412                    if (unresolvedInstances.length() > 0) {
413                        unresolvedInstances.append(CoordELFunctions.INSTANCE_SEPARATOR);
414                    }
415                    unresolvedInstances.append(instanceList[i]);
416                    continue;
417                }
418                ELEvaluator eval = CoordELEvaluator.createURIELEvaluator(instanceList[i]);
419                if (uris.length() > 0) {
420                    uris.append(CoordELFunctions.INSTANCE_SEPARATOR);
421                }
422                uris.append(CoordELFunctions.evalAndWrap(eval, event.getChild("dataset", event.getNamespace()).getChild(
423                        "uri-template", event.getNamespace()).getTextTrim()));
424            }
425            return uris.toString();
426        }
427    
428        /* (non-Javadoc)
429         * @see org.apache.oozie.command.XCommand#getEntityKey()
430         */
431        @Override
432        protected String getEntityKey() {
433            return coordAction.getJobId();
434        }
435    
436        /* (non-Javadoc)
437         * @see org.apache.oozie.command.XCommand#isLockRequired()
438         */
439        @Override
440        protected boolean isLockRequired() {
441            return true;
442        }
443    
444        /* (non-Javadoc)
445         * @see org.apache.oozie.command.XCommand#eagerLoadState()
446         */
447        @Override
448        protected void eagerLoadState() throws CommandException {
449            loadState();
450        }
451    
452        /* (non-Javadoc)
453         * @see org.apache.oozie.command.XCommand#loadState()
454         */
455        @Override
456        protected void loadState() throws CommandException {
457            if (jpaService == null) {
458                jpaService = Services.get().get(JPAService.class);
459            }
460            try {
461                coordAction = jpaService.execute(new CoordActionGetJPAExecutor(actionId));
462                coordJob = jpaService.execute(new CoordJobGetJPAExecutor(coordAction.getJobId()));
463            }
464            catch (JPAExecutorException je) {
465                throw new CommandException(je);
466            }
467            LogUtils.setLogInfo(coordAction, logInfo);
468        }
469    
470        /* (non-Javadoc)
471         * @see org.apache.oozie.command.XCommand#verifyPrecondition()
472         */
473        @Override
474        protected void verifyPrecondition() throws CommandException, PreconditionException {
475            if (coordAction.getStatus() != CoordinatorActionBean.Status.WAITING) {
476                throw new PreconditionException(ErrorCode.E1100, "[" + actionId
477                        + "]::CoordActionInputCheck:: Ignoring action. Should be in WAITING state, but state="
478                        + coordAction.getStatus());
479            }
480    
481            // if eligible to do action input check when running with backward support is true
482            if (StatusUtils.getStatusForCoordActionInputCheck(coordJob)) {
483                return;
484            }
485    
486            if (coordJob.getStatus() != Job.Status.RUNNING && coordJob.getStatus() != Job.Status.PAUSED
487                    && coordJob.getStatus() != Job.Status.PAUSEDWITHERROR) {
488                throw new PreconditionException(
489                        ErrorCode.E1100, "["+ actionId + "]::CoordActionInputCheck:: Ignoring action." +
490                                    " Coordinator job is not in RUNNING/PAUSED/PAUSEDWITHERROR state, but state="
491                                + coordJob.getStatus());
492            }
493        }
494    
495        /* (non-Javadoc)
496         * @see org.apache.oozie.command.XCommand#getKey()
497         */
498        @Override
499        public String getKey(){
500            return getName() + "_" + actionId;
501        }
502    
503    }