001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.coord;
016    
017    import java.io.IOException;
018    import java.io.StringReader;
019    import java.util.Calendar;
020    import java.util.Date;
021    import java.util.TimeZone;
022    
023    import org.apache.hadoop.conf.Configuration;
024    import org.apache.oozie.CoordinatorActionBean;
025    import org.apache.oozie.CoordinatorJobBean;
026    import org.apache.oozie.ErrorCode;
027    import org.apache.oozie.client.CoordinatorJob;
028    import org.apache.oozie.client.SLAEvent.SlaAppType;
029    import org.apache.oozie.command.CommandException;
030    import org.apache.oozie.coord.TimeUnit;
031    import org.apache.oozie.service.Service;
032    import org.apache.oozie.service.Services;
033    import org.apache.oozie.store.CoordinatorStore;
034    import org.apache.oozie.store.StoreException;
035    import org.apache.oozie.util.DateUtils;
036    import org.apache.oozie.util.Instrumentation;
037    import org.apache.oozie.util.XConfiguration;
038    import org.apache.oozie.util.XLog;
039    import org.apache.oozie.util.XmlUtils;
040    import org.apache.oozie.util.db.SLADbOperations;
041    import org.jdom.Element;
042    import org.jdom.JDOMException;
043    
044    public class CoordActionMaterializeCommand extends CoordinatorCommand<Void> {
045        private String jobId;
046        private Date startTime;
047        private Date endTime;
048        private int lastActionNumber = 1; // over-ride by DB value
049        private final XLog log = XLog.getLog(getClass());
050        private String user;
051        private String group;
052        /**
053         * Default timeout for catchup jobs, in minutes, after which coordinator input check will timeout
054         */
055        public static final String CONF_DEFAULT_TIMEOUT_CATCHUP = Service.CONF_PREFIX + "coord.catchup.default.timeout";
056    
057        public CoordActionMaterializeCommand(String jobId, Date startTime, Date endTime) {
058            super("coord_action_mater", "coord_action_mater", 1, XLog.STD);
059            this.jobId = jobId;
060            this.startTime = startTime;
061            this.endTime = endTime;
062        }
063    
064        @Override
065        protected Void call(CoordinatorStore store) throws StoreException, CommandException {
066            // CoordinatorJobBean job = store.getCoordinatorJob(jobId, true);
067            CoordinatorJobBean job = store.getEntityManager().find(CoordinatorJobBean.class, jobId);
068            setLogInfo(job);
069            if (job.getLastActionTime() != null && job.getLastActionTime().compareTo(endTime) >= 0) {
070                log.info("ENDED Coordinator materialization for jobId = " + jobId
071                        + " Action is *already* materialized for Materialization start time = " + startTime + " : Materialization end time = " + endTime + " Job status = " + job.getStatusStr());
072                return null;
073            }
074    
075            if (endTime.after(job.getEndTime())) {
076                log.info("ENDED Coordinator materialization for jobId = " + jobId + " Materialization end time = " + endTime
077                        + " surpasses coordinator job's end time = " + job.getEndTime() + " Job status = " + job.getStatusStr());
078                return null;
079            }
080    
081            if (job.getPauseTime() != null && !startTime.before(job.getPauseTime())) {
082                log.info("ENDED Coordinator materialization for jobId = " + jobId + " Materialization start time = " + startTime
083                        + " is after or equal to coordinator job's pause time = " + job.getPauseTime() + " Job status = " + job.getStatusStr());
084                // pausetime blocks real materialization - we change job's status back to RUNNING;
085                if (job.getStatus() == CoordinatorJob.Status.PREMATER) {
086                    job.setStatus(CoordinatorJob.Status.RUNNING);
087                }
088                store.updateCoordinatorJob(job);
089                return null;
090            }
091    
092            this.user = job.getUser();
093            this.group = job.getGroup();
094    
095            if (job.getStatus().equals(CoordinatorJobBean.Status.PREMATER)) {
096                Configuration jobConf = null;
097                log.debug("start job :" + jobId + " Materialization ");
098                try {
099                    jobConf = new XConfiguration(new StringReader(job.getConf()));
100                }
101                catch (IOException ioe) {
102                    log.warn("Configuration parse error. read from DB :" + job.getConf(), ioe);
103                    throw new CommandException(ErrorCode.E1005, ioe);
104                }
105    
106                Instrumentation.Cron cron = new Instrumentation.Cron();
107                cron.start();
108                try {
109                    materializeJobs(false, job, jobConf, store);
110                    updateJobTable(job, store);
111                }
112                catch (CommandException ex) {
113                    log.warn("Exception occurs:" + ex + " Making the job failed ");
114                    job.setStatus(CoordinatorJobBean.Status.FAILED);
115                    store.updateCoordinatorJob(job);
116                }
117                catch (Exception e) {
118                    log.error("Excepion thrown :", e);
119                    throw new CommandException(ErrorCode.E1001, e.getMessage(), e);
120                }
121                cron.stop();
122            }
123            else {
124                log.info("WARN: action is not in PREMATER state!  It's in state=" + job.getStatus());
125            }
126            return null;
127        }
128    
129        /**
130         * Create action instances starting from "start-time" to end-time" and store them into Action table.
131         *
132         * @param dryrun
133         * @param jobBean
134         * @param conf
135         * @param store
136         * @throws Exception
137         */
138        protected String materializeJobs(boolean dryrun, CoordinatorJobBean jobBean, Configuration conf,
139                                         CoordinatorStore store) throws Exception {
140            String jobXml = jobBean.getJobXml();
141            Element eJob = XmlUtils.parseXml(jobXml);
142            // TODO: always UTC?
143            TimeZone appTz = DateUtils.getTimeZone(jobBean.getTimeZone());
144            // TimeZone appTz = DateUtils.getTimeZone("UTC");
145            int frequency = jobBean.getFrequency();
146            TimeUnit freqTU = TimeUnit.valueOf(eJob.getAttributeValue("freq_timeunit"));
147            TimeUnit endOfFlag = TimeUnit.valueOf(eJob.getAttributeValue("end_of_duration"));
148            Calendar start = Calendar.getInstance(appTz);
149            start.setTime(startTime);
150            DateUtils.moveToEnd(start, endOfFlag);
151            Calendar end = Calendar.getInstance(appTz);
152            end.setTime(endTime);
153            lastActionNumber = jobBean.getLastActionNumber();
154            // DateUtils.moveToEnd(end, endOfFlag);
155            log.info("   *** materialize Actions for tz=" + appTz.getDisplayName() + ",\n start=" + start.getTime()
156                    + ", end=" + end.getTime() + "\n TimeUNIT " + freqTU.getCalendarUnit() + " Frequency :" + frequency
157                    + ":" + freqTU + " lastActionNumber " + lastActionNumber);
158            // Keep the actual start time
159            Calendar origStart = Calendar.getInstance(appTz);
160            origStart.setTime(jobBean.getStartTimestamp());
161            // Move to the End of duration, if needed.
162            DateUtils.moveToEnd(origStart, endOfFlag);
163            // Cloning the start time to be used in loop iteration
164            Calendar effStart = (Calendar) origStart.clone();
165            // Move the time when the previous action finished
166            effStart.add(freqTU.getCalendarUnit(), lastActionNumber * frequency);
167    
168            String action = null;
169            StringBuilder actionStrings = new StringBuilder();
170            Date jobPauseTime = jobBean.getPauseTime();
171            Calendar pause = null;
172            if (jobPauseTime != null) {
173                pause = Calendar.getInstance(appTz);
174                pause.setTime(DateUtils.convertDateToTimestamp(jobPauseTime));
175            }
176    
177            while (effStart.compareTo(end) < 0) {
178                if (pause != null && effStart.compareTo(pause) >= 0) {
179                    break;
180                }
181                CoordinatorActionBean actionBean = new CoordinatorActionBean();
182                lastActionNumber++;
183    
184                int timeout = jobBean.getTimeout();
185                log.debug(origStart.getTime() + " Materializing action for time=" + effStart.getTime()
186                        + ", lastactionnumber=" + lastActionNumber);
187                Date actualTime = new Date();
188                action = CoordCommandUtils.materializeOneInstance(jobId, dryrun, (Element) eJob.clone(),
189                        effStart.getTime(), actualTime, lastActionNumber, conf, actionBean);
190                int catchUpTOMultiplier = 1; // This value might be could be changed in future
191                if (actionBean.getNominalTimestamp().before(jobBean.getCreatedTimestamp())) {
192                    // Catchup action
193                    timeout = catchUpTOMultiplier * timeout;
194                    // actionBean.setTimeOut(Services.get().getConf().getInt(CONF_DEFAULT_TIMEOUT_CATCHUP,
195                    // -1));
196                    log.info("Catchup timeout is :" + actionBean.getTimeOut());
197                }
198                actionBean.setTimeOut(timeout);
199    
200                if (!dryrun) {
201                    storeToDB(actionBean, action, store); // Storing to table
202                }
203                else {
204                    actionStrings.append("action for new instance");
205                    actionStrings.append(action);
206                }
207                // Restore the original start time
208                effStart = (Calendar) origStart.clone();
209                effStart.add(freqTU.getCalendarUnit(), lastActionNumber * frequency);
210            }
211    
212            endTime = new Date(effStart.getTimeInMillis());
213            if (!dryrun) {
214                return action;
215            }
216            else {
217                return actionStrings.toString();
218            }
219        }
220    
221        /**
222         * Store an Action into database table.
223         *
224         * @param actionBean
225         * @param actionXml
226         * @param store
227         * @param wantSla
228         * @throws StoreException
229         * @throws JDOMException
230         */
231        private void storeToDB(CoordinatorActionBean actionBean, String actionXml, CoordinatorStore store) throws Exception {
232            log.debug("In storeToDB() action Id " + actionBean.getId() + " Size of actionXml " + actionXml.length());
233            actionBean.setActionXml(actionXml);
234            store.insertCoordinatorAction(actionBean);
235            writeActionRegistration(actionXml, actionBean, store);
236    
237            // TODO: time 100s should be configurable
238            queueCallable(new CoordActionNotification(actionBean), 100);
239            queueCallable(new CoordActionInputCheckCommand(actionBean.getId()), 100);
240        }
241    
242        /**
243         * @param actionXml
244         * @param actionBean
245         * @param store
246         * @throws Exception
247         */
248        private void writeActionRegistration(String actionXml, CoordinatorActionBean actionBean, CoordinatorStore store)
249                throws Exception {
250            Element eAction = XmlUtils.parseXml(actionXml);
251            Element eSla = eAction.getChild("action", eAction.getNamespace()).getChild("info", eAction.getNamespace("sla"));
252            SLADbOperations.writeSlaRegistrationEvent(eSla, store, actionBean.getId(), SlaAppType.COORDINATOR_ACTION, user,
253                                                      group);
254        }
255    
256        /**
257         * @param job
258         * @param store
259         * @throws StoreException
260         */
261        private void updateJobTable(CoordinatorJobBean job, CoordinatorStore store) throws StoreException {
262            // TODO: why do we need this? Isn't lastMatTime enough???
263            job.setLastActionTime(endTime);
264            job.setLastActionNumber(lastActionNumber);
265            // if the job endtime == action endtime, then set status of job to
266            // succeeded
267            // we dont need to materialize this job anymore
268            Date jobEndTime = job.getEndTime();
269            if (jobEndTime.compareTo(endTime) <= 0) {
270                job.setStatus(CoordinatorJob.Status.SUCCEEDED);
271                log.info("[" + job.getId() + "]: Update status from PREMATER to SUCCEEDED");
272            }
273            else {
274                job.setStatus(CoordinatorJob.Status.RUNNING);
275                log.info("[" + job.getId() + "]: Update status from PREMATER to RUNNING");
276            }
277            job.setNextMaterializedTime(endTime);
278            store.updateCoordinatorJob(job);
279        }
280    
281        @Override
282        protected Void execute(CoordinatorStore store) throws StoreException, CommandException {
283            log.info("STARTED CoordActionMaterializeCommand for jobId=" + jobId + ", startTime=" + startTime + ", endTime="
284                    + endTime);
285            try {
286                if (lock(jobId)) {
287                    call(store);
288                }
289                else {
290                    queueCallable(new CoordActionMaterializeCommand(jobId, startTime, endTime),
291                            LOCK_FAILURE_REQUEUE_INTERVAL);
292                    log.warn("CoordActionMaterializeCommand lock was not acquired - failed jobId=" + jobId
293                            + ". Requeing the same.");
294                }
295            }
296            catch (InterruptedException e) {
297                queueCallable(new CoordActionMaterializeCommand(jobId, startTime, endTime), LOCK_FAILURE_REQUEUE_INTERVAL);
298                log.warn("CoordActionMaterializeCommand lock acquiring failed with exception " + e.getMessage()
299                        + " for jobId=" + jobId + " Requeing the same.");
300            }
301            finally {
302                log.info(" ENDED CoordActionMaterializeCommand for jobId=" + jobId + ", startTime=" + startTime
303                        + ", endTime=" + endTime);
304            }
305            return null;
306        }
307    
308    
309    
310        /**
311         * For preliminery testing. Should be removed soon
312         *
313         * @param args
314         * @throws Exception
315         */
316        public static void main(String[] args) throws Exception {
317            new Services().init();
318            try {
319                Date startTime = DateUtils.parseDateUTC("2009-02-01T01:00Z");
320                Date endTime = DateUtils.parseDateUTC("2009-02-02T01:00Z");
321                String jobId = "0000000-091207151850551-oozie-dani-C";
322                CoordActionMaterializeCommand matCmd = new CoordActionMaterializeCommand(jobId, startTime, endTime);
323                matCmd.call();
324            }
325            finally {
326                try {
327                    Thread.sleep(60000);
328                }
329                catch (Exception ex) {
330                }
331                new Services().destroy();
332            }
333        }
334    
335    }