001    package org.LiveGraph.dataFile.read;
002    
003    import java.io.BufferedReader;
004    import java.io.Closeable;
005    import java.io.IOException;
006    import java.io.InputStream;
007    import java.io.InputStreamReader;
008    import java.util.ArrayList;
009    import java.util.Collections;
010    import java.util.HashMap;
011    import java.util.List;
012    import java.util.Map;
013    
014    import org.LiveGraph.dataFile.common.DataFormatException;
015    
016    
017    import static org.LiveGraph.dataFile.common.DataFormatTools.*;
018    
019    
020    /**
021     * A reader for a data stream (usually, a CSV file). This reader
022     * will parse the data stream and extract the file information, the data
023     * series headings and the actual data.<br />
024     * <br />
025     * The information extracted from the data stream is passed to the application
026     * using an observer pattern: after a line was parsed, the appropriate 
027     * {@code notifyXXXX(...)}-method of this class is called with the extracted
028     * information. The {@code notifyXXXX(...)}-methods dispatch appropriate
029     * notifications to all {@link DataStreamObserver}-objects registered with this
030     * {@code DataStreamReader}-instance.<br />
031     * If required, an application may also overwrite the {@code notifyXXXX(...)}-methods
032     * to handle data read events.<br /> 
033     * <br />
034     * See {@link org.LiveGraph.dataFile.write.DataStreamWriter} for the details of the
035     * data file format.<br />
036     * <br />
037     * Note, that this class has a different role than it did in version 1.01 of the
038     * LiveGraph API. The {@code DataStreamReader} class from version 1.01 is replaced by
039     * {@link org.LiveGraph.dataCache.DataStreamToCacheReader}.
040     * 
041     * <p><strong>LiveGraph</strong> (http://www.live-graph.org).</p>
042     * <p>Copyright (c) 2007 by G. Paperin.</p>
043     * <p>File: DataStreamReader.java</p> 
044     * <p style="font-size:smaller;">Redistribution and use in source and binary forms, with or
045     *    without modification, are permitted provided that the following terms and conditions are met:
046     * </p>
047     * <p style="font-size:smaller;">1. Redistributions of source code must retain the above
048     *    acknowledgement of the LiveGraph project and its web-site, the above copyright notice,
049     *    this list of conditions and the following disclaimer.<br />
050     *    2. Redistributions in binary form must reproduce the above acknowledgement of the
051     *    LiveGraph project and its web-site, the above copyright notice, this list of conditions
052     *    and the following disclaimer in the documentation and/or other materials provided with
053     *    the distribution.<br />
054     *    3. All advertising materials mentioning features or use of this software or any derived
055     *    software must display the following acknowledgement:<br />
056     *    <em>This product includes software developed by the LiveGraph project and its
057     *    contributors.<br />(http://www.live-graph.org)</em><br />
058     *    4. All advertising materials distributed in form of HTML pages or any other technology
059     *    permitting active hyper-links that mention features or use of this software or any
060     *    derived software must display the acknowledgment specified in condition 3 of this
061     *    agreement, and in addition, include a visible and working hyper-link to the LiveGraph
062     *    homepage (http://www.live-graph.org).
063     * </p>
064     * <p style="font-size:smaller;">THIS SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY
065     *    OF ANY KIND, EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
066     *    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND  NONINFRINGEMENT. IN NO EVENT SHALL
067     *    THE AUTHORS, CONTRIBUTORS OR COPYRIGHT  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
068     *    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING  FROM, OUT OF OR
069     *    IN CONNECTION WITH THE SOFTWARE OR THE USE OR  OTHER DEALINGS IN THE SOFTWARE.
070     * </p>
071     * 
072     * @author Greg Paperin (http://www.paperin.org)
073     * @version {@value org.LiveGraph.LiveGraph#version}
074     * @see DataStreamObserver
075     * @see DataStreamObserverAdapter
076     * @see org.LiveGraph.dataCache.DataStreamToCacheReader
077     */
078    public class DataStreamReader implements Closeable {
079    
080    /**
081     * Data stream reader.
082     */
083    private BufferedReader in = null;
084    
085    /**
086     * Data values separator.
087     */
088    private String separator = DefaultSeparator;
089    
090    /**
091     * Whether the data values separator was already finalised. 
092     */
093    private boolean separatorSet = false;
094    
095    /**
096     * Whether the data series headings are already set-up.
097     */
098    private boolean labelsSet = false;
099    
100    /**
101     * The data stream index of the next data record.
102     */
103    private int nextDatasetFileIndex = -1;
104    
105    /**
106     * Observers who want to know what's on the data stream.
107     */
108    private List<DataStreamObserver> observers = null; 
109    
110    
111    /**
112     * Creates a data reader on the specified stream.
113     * 
114     * @param is The stream from which to read. 
115     */
116    public DataStreamReader(InputStream is) {
117            
118            if (null == is)
119                    throw new NullPointerException("Cannot read from a null stream.");
120            
121            this.in = new BufferedReader(new InputStreamReader(is));        
122            this.separator = DefaultSeparator;
123            this.separatorSet = false;
124            this.labelsSet = false;
125            this.nextDatasetFileIndex = -1;
126            this.observers = new ArrayList<DataStreamObserver>(); 
127    }
128    
129    /**
130     * Creates a data reader on the specified stream and add one initial observer.
131     * 
132     * @param is The stream from which to read. 
133     * @param observer An observer for the data stream contents.
134     */
135    public DataStreamReader(InputStream is, DataStreamObserver observer) {
136            this(is);
137            addObserver(observer);
138    }
139    
140    
141    /**
142     * Tells whether this reader's underlying data stream is ready to be read.
143     * 
144     * @return {@code true} if the next {@code readFromStream()} is guaranteed not to block for input,
145     * {@code false} otherwise. Note that returning {@code false} does not guarantee that the next read
146     * will block.
147     * @throws IOException If an I/O error occurs.
148     */
149    public boolean ready() throws IOException {
150            return in.ready();
151    }
152    
153    /**
154     * Closes the underlying data stream. Further reading is not possible after calling this method.
155     * @throws IOException If an I/O error occurs.
156     */
157    public void close() throws IOException {
158            in.close();
159    }
160    
161    /**
162     * Reads as many data lines from the underlying stream as there are available and parses them.
163     *  
164     * @return The number on non-empty data lines read.
165     * @throws IOException If an I/O error occurs.
166     * @throws DataFormatException If the data stream contents do not conform with the expected data
167     * stream format.
168     * @see org.LiveGraph.dataFile.write.DataStreamWriter
169     * @see #readFromStream(int)
170     */
171    public int readFromStream() throws IOException, DataFormatException {   
172            return readFromStream(-1);      
173    }
174    
175    /**
176     * Reads up to a specified number of data lines from the underlying stream, and parses the lines.
177     * Reading is stopped when the specified number of lines in reached or if no more lines are available.
178     * 
179     * @param maxLines The maximum number of data lines to read (empty lines are ignored and not counted,
180     * but all other lines including comment lines are counted). If negative, all available lines will
181     * be read.
182     * @return The number on non-empty data lines read.
183     * @throws IOException If an I/O error occurs.
184     * @throws DataFormatException If the data stream contents do not conform with the expected data
185     * stream format.
186     * @see org.LiveGraph.dataFile.write.DataStreamWriter
187     */
188    public int readFromStream(int maxLines) throws IOException, DataFormatException {
189            
190            int linesRead = 0;
191            String line = null;
192            while (ready() && (0 > maxLines || linesRead < maxLines) ) {
193                    line = in.readLine();
194                    line = line.trim();
195                    if (line.length() > 0) {
196                            processLine(line);
197                            linesRead++;
198                    }
199            }
200            return linesRead;
201    }
202    
203    /**
204     * Notifies observers regestered with this parser of a "data values separator set"-event.
205     * 
206     * @param separator New data separator to be passed to the observers.
207     */
208    protected void notifySeparatorSet(String separator) {
209            for (DataStreamObserver observer : observers)
210                    observer.eventSeparatorSet(separator, this);
211    }
212    
213    /**
214     * Notifies observers regestered with this parser of a "comment line parsed"-event.
215     * 
216     * @param comment The parsed comment line to be passed to the observers.
217     */
218    protected void notifyCommentLine(String comment) {
219            for (DataStreamObserver observer : observers)
220                    observer.eventCommentLine(comment, this);
221    }
222    
223    /**
224     * Notifies observers regestered with this parser of a "file info line parsed"-event.
225     * 
226     * @param info The parsed file info to be passed to the observers.
227     */
228    protected void notifyFileInfoLine(String info) {
229            for (DataStreamObserver observer : observers)
230                    observer.eventFileInfoLine(info, this);
231    }
232    
233    /**
234     * Notifies observers regestered with this parser of a "data series labels parsed"-event.
235     * 
236     * @param labels The parsed data series labels to be passed to the observers.
237     */
238    protected void notifyLabelsSet(List<String> labels) {
239            for (DataStreamObserver observer : observers)
240                    observer.eventLabelsSet(labels, this);
241    }
242    
243    /**
244     * Notifies observers regestered with this parser of a "dataset parsed"-event.
245     * 
246     * @param dataTokens The parsed data tokens to be passed to the observers.
247     * @param datasetIndex The file index of the parsed dataset to be passed to the observers.
248     */
249    protected void notifyDataLineRead(List<String> dataTokens, int datasetIndex) {
250            for (DataStreamObserver observer : observers)
251                    observer.eventDataLineRead(dataTokens, datasetIndex, this);
252    }
253    
254    /**
255     * Adds an observer to this parser.
256     * 
257     * @param observer The observer to add.
258     * @return {@code if the specified observer cound not be added because it was already registered},
259     * {@code true otherwise}.
260     */
261    public boolean addObserver(DataStreamObserver observer) {
262            if (null == observer || hasObserver(observer))
263                    return false;
264            return observers.add(observer);
265    }
266    
267    /**
268     * Checks whether the specified observer is registered with this parser.
269     *  
270     * @param observer An observer.
271     * @return {@code true} if the specified {@code observer} is not {@code null} and is regestered
272     * with this parser, {@code false} otherwise.
273     */
274    public boolean hasObserver(DataStreamObserver observer) {
275            if (null == observer)
276                    return false;
277            return observers.contains(observer);    
278    }
279    
280    /**
281     * De-registeres the specified observer from this parser.
282     * 
283     * @param observer An observer.
284     * @return {@code true} if the specified observer is not {@code null} and was on the 
285     * list of registered observers and is now removed from this list, {@code false} otherwise. 
286     */
287    public boolean removeObserver(DataStreamObserver observer) {
288            if (null == observer)
289                    return false;
290            return observers.remove(observer);
291    }
292    
293    /**
294     * Counts this parser's observers.
295     * 
296     * @return The number of observers registered with this parser.
297     */
298    public int countObservers() {
299            return observers.size();
300    }
301    
302    /**
303     * This static utility method converts a list of {@code String} tokens (presumably just parsed
304     * from a data line) to a list of {@code Double} objects containing the tokens' values; tokens
305     * that cannot be parsed to a {@code Double} are represented by {@code null}-objects in the
306     * resulting list.
307     * 
308     * @param tokens A list of data tokens.
309     * @return A list of the double values of the specified tokens.
310     */
311    public static List<Double> convertTokensToDoubles(List<String> tokens) {
312            
313            if (null == tokens)
314                    return Collections.emptyList();
315            
316            List<Double> doubles = new ArrayList<Double>(tokens.size());
317            for (String tok : tokens) {
318                    
319                    if (null == tok)
320                            continue;
321                    
322                    tok = tok.trim();
323                    
324                    Double val = null;
325                    if (null != tok && 0 < tok.length()) {
326                            try { val = Double.valueOf(tok); }
327                            catch (NumberFormatException e) { val = null; }
328                    }
329                    
330                    doubles.add(val);               
331            }
332            return doubles;
333    }
334    
335    
336    /**
337     * This static utility method converts a list of strings (presumably representing a list of
338     * labels just parsed from the data file) to a list of strings where each string is unique
339     * in respect to its {@code equals} method (case sensitive); this happens by attaching 
340     * counters to repreated strings: for instance, {@code ["boo", "foo", "boo"]} it converted to
341     * {@code ["boo (1)", "foo", "boo (2)"]}.  
342     *  
343     * @param rawLabels The list of labels to convert.
344     * @param allowEmptyLabels If this is {@code false}, all empty strings ({@code ""}) are converted
345     * to underscores ({@code "_"}) before possibly applying the counters.
346     * @return A list of unique data series labels based on the specified list.
347     */
348    public static List<String> createUniqueLabels(List<String> rawLabels, boolean allowEmptyLabels) {
349            
350            List<String> uniqueLabels = new ArrayList<String>();
351            Map<String, Integer> labelCounts = new HashMap<String, Integer>();
352                    
353            // Mark labels which occure more than once:
354            for (String rawLabel : rawLabels) {
355                    
356                    rawLabel = rawLabel.trim();
357                    if (!allowEmptyLabels && rawLabel.length() == 0)
358                            rawLabel = "_";
359                    
360                    if (!labelCounts.containsKey(rawLabel)) {
361                            
362                            labelCounts.put(rawLabel, 1);
363                            
364                    } else {
365            
366                            int c = labelCounts.get(rawLabel);                      
367                            labelCounts.put(rawLabel, ++c);
368                            rawLabel = rawLabel + " (" + c + ")";
369                    }
370                                                    
371                    uniqueLabels.add(rawLabel);
372            }
373            
374            // Change first occurence of "label" into "label (1)" for the labels which appear more than once:
375            for (String label : labelCounts.keySet()) {
376                    int c = labelCounts.get(label); 
377                    if (1 < c) {
378                            int p = uniqueLabels.indexOf(label);
379                            uniqueLabels.set(p, label + " (1)");
380                    }
381            }
382            
383            // Done:        
384            return uniqueLabels;
385    }
386    
387    
388    /**
389     * Examines a data line and dispatches to a specialised parsing routine.
390     * 
391     * @param line A data line.
392     * @throws DataFormatException If the data stream contents do not conform with the expected data
393     * stream format.
394     */
395    private void processLine(String line) throws DataFormatException {
396    
397            if (!separatorSet && line.startsWith(TAGSepDefinition) && line.endsWith(TAGSepDefinition)) {
398                    processSeparatorDefinitionLine(line);
399                    return;
400            }
401            
402            if (line.startsWith(TAGComment)) {
403                    processCommentLine(line);
404                    return;
405            }
406            
407            if (line.startsWith(TAGFileInfo)) {
408                    processFileInfoLine(line);
409                    return;
410            }
411            
412            if (!labelsSet) {
413                    processSeriesLabelsLine(line);
414                    return;
415            }
416            
417            if (true) {
418                    processDataLine(line);
419                    return;
420            }
421            
422            throw new Error("The program should never get to this line!");          
423    }
424    
425    /**
426     * Parses a data values separator definition line.
427     * 
428     * @param line Data line to parse.
429     * @throws DataFormatException If the data line contents are not in the expected format.
430     */
431    private void processSeparatorDefinitionLine(String line) throws DataFormatException {
432            
433            if (line.length() < TAGSepDefinition.length() * 2)
434                    throw new DataFormatException("Illegal separator definition: \"" + line + "\"");
435            
436            if (line.length() == TAGSepDefinition.length() * 2)
437                    throw new DataFormatException("Illegal separator definition: separator may not be an empty string");
438            
439            String sep = line.substring(TAGSepDefinition.length(), line.length() - TAGSepDefinition.length());
440            
441            String problem = isValidSeparator(sep);
442            if (null != problem)
443                    throw new DataFormatException("Illegal separator definition: " + problem);
444            
445            separator = sep;
446            separatorSet = true;
447            notifySeparatorSet(separator);
448    }
449    
450    /**
451     * Parses a comments line.
452     * 
453     * @param line Data line to parse.
454     * @throws DataFormatException If the data line contents are not in the expected format.
455     */
456    private void processCommentLine(String line) throws DataFormatException {
457            String comment = "";
458            if (line.length() > TAGComment.length())
459                    comment = line.substring(TAGComment.length()).trim();
460            
461            separatorSet = true;
462            notifyCommentLine(comment);
463    }
464    
465    /**
466     * Parses a file information line.
467     * 
468     * @param line Data line to parse.
469     * @throws DataFormatException If the data line contents are not in the expected format.
470     */
471    private void processFileInfoLine(String line) throws DataFormatException {
472            String info = "";
473            if (line.length() > TAGFileInfo.length())
474                    info = line.substring(TAGFileInfo.length()).trim();
475            
476            separatorSet = true;
477            notifyFileInfoLine(info);
478    }
479    
480    /**
481     * Parses a data series headings line.
482     * 
483     * @param line Data line to parse.
484     * @throws DataFormatException If the data line contents are not in the expected format.
485     */
486    private void processSeriesLabelsLine(String line) throws DataFormatException {
487            
488            DataLineTokenizer tok = new DataLineTokenizer(line, separator);         
489            nextDatasetFileIndex = 0;
490            labelsSet = true;
491            separatorSet = true;
492            notifyLabelsSet(Collections.unmodifiableList(tok.getTokens()));
493    }
494    
495    /**
496     * Parses a data line.
497     * 
498     * @param line Data line to parse.
499     * @throws DataFormatException If the data line contents are not in the expected format.
500     */
501    private void processDataLine(String line) throws DataFormatException {
502            
503            DataLineTokenizer tok = new DataLineTokenizer(line, separator);
504            separatorSet = true;
505            notifyDataLineRead(tok.getTokens(), nextDatasetFileIndex++);
506    }
507    
508    }