ANTLR Support Libraries 2.7.1+
CharScanner.hpp
Go to the documentation of this file.
1#ifndef INC_CharScanner_hpp__
2#define INC_CharScanner_hpp__
3
4/* ANTLR Translator Generator
5 * Project led by Terence Parr at http://www.jGuru.com
6 * Software rights: http://www.antlr.org/license.html
7 *
8 * $Id: //depot/code/org.antlr/release/antlr-2.7.7/lib/cpp/antlr/CharScanner.hpp#2 $
9 */
10
11#include <antlr/config.hpp>
12
13#include <map>
14#include <strings.h>
15#include <cstdio>
16
17#ifdef HAS_NOT_CCTYPE_H
18#include <ctype.h>
19#else
20#include <cctype>
21#endif
22
23#if ( _MSC_VER == 1200 )
24// VC6 seems to need this
25// note that this is not a standard C++ include file.
26# include <stdio.h>
27#endif
28
29#include <antlr/TokenStream.hpp>
33#include <antlr/InputBuffer.hpp>
34#include <antlr/BitSet.hpp>
36
37#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
38namespace antlr {
39#endif
40
42
43ANTLR_C_USING(tolower)
44
45#ifdef ANTLR_REALLY_NO_STRCASECMP
46// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
47// on the mac has neither...
48inline int strcasecmp(const char *s1, const char *s2)
49{
50 while (true)
51 {
52 char c1 = tolower(*s1++),
53 c2 = tolower(*s2++);
54 if (c1 < c2) return -1;
55 if (c1 > c2) return 1;
56 if (c1 == 0) return 0;
57 }
58}
59#else
60#ifdef NO_STRCASECMP
61ANTLR_C_USING(stricmp)
62#else
63ANTLR_C_USING(strcasecmp)
64#endif
65#endif
66
69class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
70private:
72public:
73#ifdef NO_TEMPLATE_PARTS
74 CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
75#endif
77 : scanner(theScanner)
78 {
79 }
80 bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
81// defaults are good enough..
82 // CharScannerLiteralsLess(const CharScannerLiteralsLess&);
83 // CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
84};
85
89protected:
90 typedef RefToken (*factory_type)();
91public:
92 CharScanner(InputBuffer& cb, bool case_sensitive );
93 CharScanner(InputBuffer* cb, bool case_sensitive );
94 CharScanner(const LexerSharedInputState& state, bool case_sensitive );
95
96 virtual ~CharScanner()
97 {
98 }
99
100 virtual int LA(unsigned int i);
101
102 virtual void append(char c)
103 {
104 if (saveConsumedInput)
105 {
106 size_t l = text.length();
107
108 if ((l%256) == 0)
109 text.reserve(l+256);
110
111 text.replace(l,0,&c,1);
112 }
113 }
114
115 virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
116 {
117 if( saveConsumedInput )
118 text += s;
119 }
120
121 virtual void commit()
122 {
123 inputState->getInput().commit();
124 }
125
129 virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
130 {
131 consume();
132 consumeUntil(tokenSet);
133 }
134
135 virtual void consume()
136 {
137 if (inputState->guessing == 0)
138 {
139 int c = LA(1);
140 if (caseSensitive)
141 {
142 append(c);
143 }
144 else
145 {
146 // use input.LA(), not LA(), to get original case
147 // CharScanner.LA() would toLower it.
148 append(inputState->getInput().LA(1));
149 }
150
151 // RK: in a sense I don't like this automatic handling.
152 if (c == '\t')
153 tab();
154 else
155 inputState->column++;
156 }
157 inputState->getInput().consume();
158 }
159
161 virtual void consumeUntil(int c)
162 {
163 for(;;)
164 {
165 int la_1 = LA(1);
166 if( la_1 == EOF_CHAR || la_1 == c )
167 break;
168 consume();
169 }
170 }
171
173 virtual void consumeUntil(const BitSet& set)
174 {
175 for(;;)
176 {
177 int la_1 = LA(1);
178 if( la_1 == EOF_CHAR || set.member(la_1) )
179 break;
180 consume();
181 }
182 }
183
185 virtual unsigned int mark()
186 {
187 return inputState->getInput().mark();
188 }
190 virtual void rewind(unsigned int pos)
191 {
192 inputState->getInput().rewind(pos);
193 }
194
196 virtual void match(int c)
197 {
198 int la_1 = LA(1);
199 if ( la_1 != c )
200 throw MismatchedCharException(la_1, c, false, this);
201 consume();
202 }
203
207 virtual void match(const BitSet& b)
208 {
209 int la_1 = LA(1);
210
211 if ( !b.member(la_1) )
212 throw MismatchedCharException( la_1, b, false, this );
213 consume();
214 }
215
219 virtual void match( const char* s )
220 {
221 while( *s != '\0' )
222 {
223 // the & 0xFF is here to prevent sign extension lateron
224 int la_1 = LA(1), c = (*s++ & 0xFF);
225
226 if ( la_1 != c )
227 throw MismatchedCharException(la_1, c, false, this);
228
229 consume();
230 }
231 }
235 virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
236 {
237 size_t len = s.length();
238
239 for (size_t i = 0; i < len; i++)
240 {
241 // the & 0xFF is here to prevent sign extension lateron
242 int la_1 = LA(1), c = (s[i] & 0xFF);
243
244 if ( la_1 != c )
245 throw MismatchedCharException(la_1, c, false, this);
246
247 consume();
248 }
249 }
253 virtual void matchNot(int c)
254 {
255 int la_1 = LA(1);
256
257 if ( la_1 == c )
258 throw MismatchedCharException(la_1, c, true, this);
259
260 consume();
261 }
265 virtual void matchRange(int c1, int c2)
266 {
267 int la_1 = LA(1);
268
269 if ( la_1 < c1 || la_1 > c2 )
270 throw MismatchedCharException(la_1, c1, c2, false, this);
271
272 consume();
273 }
274
275 virtual bool getCaseSensitive() const
276 {
277 return caseSensitive;
278 }
279
280 virtual void setCaseSensitive(bool t)
281 {
282 caseSensitive = t;
283 }
284
285 virtual bool getCaseSensitiveLiterals() const=0;
286
288 virtual int getLine() const
289 {
290 return inputState->line;
291 }
292
294 virtual void setLine(int l)
295 {
296 inputState->line = l;
297 }
298
300 virtual int getColumn() const
301 {
302 return inputState->column;
303 }
305 virtual void setColumn(int c)
306 {
307 inputState->column = c;
308 }
309
311 virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
312 {
313 return inputState->filename;
314 }
316 virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
317 {
318 inputState->filename = f;
319 }
320
321 virtual bool getCommitToPath() const
322 {
323 return commitToPath;
324 }
325
326 virtual void setCommitToPath(bool commit)
327 {
328 commitToPath = commit;
329 }
330
332 virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
333 {
334 return text;
335 }
336
337 virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
338 {
339 text = s;
340 }
341
342 virtual void resetText()
343 {
344 text = "";
345 inputState->tokenStartColumn = inputState->column;
346 inputState->tokenStartLine = inputState->line;
347 }
348
349 virtual RefToken getTokenObject() const
350 {
351 return _returnToken;
352 }
353
357 virtual void newline()
358 {
359 ++inputState->line;
360 inputState->column = 1;
361 }
362
367 virtual void tab()
368 {
369 int c = getColumn();
370 int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
371 setColumn( nc );
372 }
374 int setTabsize( int size )
375 {
376 int oldsize = tabsize;
377 tabsize = size;
378 return oldsize;
379 }
381 int getTabSize() const
382 {
383 return tabsize;
384 }
385
387 virtual void reportError(const RecognitionException& e);
388
390 virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
391
393 virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
394
396 {
397 return inputState->getInput();
398 }
399
401 {
402 return inputState;
403 }
404
408 {
409 inputState = state;
410 }
411
413 virtual void setTokenObjectFactory(factory_type factory)
414 {
415 tokenFactory = factory;
416 }
417
421 virtual int testLiteralsTable(int ttype) const
422 {
423 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
424 if (i != literals.end())
425 ttype = (*i).second;
426 return ttype;
427 }
428
434 virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
435 {
436 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
437 if (i != literals.end())
438 ttype = (*i).second;
439 return ttype;
440 }
441
443 virtual int toLower(int c) const
444 {
445 // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
446 // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
447 // this one is more structural. Maybe make this configurable.
448 return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
449 }
450
466 virtual void uponEOF()
467 {
468 }
469
471 virtual void traceIndent();
472 virtual void traceIn(const char* rname);
473 virtual void traceOut(const char* rname);
474
475#ifndef NO_STATIC_CONSTS
476 static const int EOF_CHAR = EOF;
477#else
478 enum {
479 EOF_CHAR = EOF
480 };
481#endif
482protected:
486 factory_type tokenFactory;
489
491
494
500
502
504 virtual RefToken makeToken(int t)
505 {
506 RefToken tok = tokenFactory();
507 tok->setType(t);
508 tok->setColumn(inputState->tokenStartColumn);
509 tok->setLine(inputState->tokenStartLine);
510 return tok;
511 }
512
515 class Tracer {
516 private:
518 const char* text;
519
520 Tracer(const Tracer& other); // undefined
521 Tracer& operator=(const Tracer& other); // undefined
522 public:
523 Tracer( CharScanner* p,const char* t )
524 : parser(p), text(t)
525 {
526 parser->traceIn(text);
527 }
529 {
530 parser->traceOut(text);
531 }
532 };
533
535private:
536 CharScanner( const CharScanner& other ); // undefined
537 CharScanner& operator=( const CharScanner& other ); // undefined
538
539#ifndef NO_STATIC_CONSTS
540 static const int NO_CHAR = 0;
541#else
542 enum {
543 NO_CHAR = 0
544 };
545#endif
546};
547
548inline int CharScanner::LA(unsigned int i)
549{
550 int c = inputState->getInput().LA(i);
551
552 if ( caseSensitive )
553 return c;
554 else
555 return toLower(c); // VC 6 tolower bug caught in toLower.
556}
557
558inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
559{
560 if (scanner->getCaseSensitiveLiterals())
561 return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
562 else
563 {
564#ifdef NO_STRCASECMP
565 return (stricmp(x.c_str(),y.c_str())<0);
566#else
567 return (strcasecmp(x.c_str(),y.c_str())<0);
568#endif
569 }
570}
571
572#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
573}
574#endif
575
576#endif //INC_CharScanner_hpp__
TokenRefCount< Token > RefToken
Definition: TokenRefCount.hpp:92
Definition: BitSet.hpp:40
bool member(unsigned int el) const
Definition: BitSet.cpp:40
Definition: CharScanner.hpp:69
CharScannerLiteralsLess(const CharScanner *theScanner)
Definition: CharScanner.hpp:76
bool operator()(const std ::string &x, const std ::string &y) const
Definition: CharScanner.hpp:558
const CharScanner * scanner
Definition: CharScanner.hpp:71
Definition: CharScanner.hpp:515
Tracer(CharScanner *p, const char *t)
Definition: CharScanner.hpp:523
~Tracer()
Definition: CharScanner.hpp:528
const char * text
Definition: CharScanner.hpp:518
Tracer & operator=(const Tracer &other)
Tracer(const Tracer &other)
CharScanner * parser
Definition: CharScanner.hpp:517
Definition: CharScanner.hpp:88
virtual int getColumn() const
Get the column the scanner currently is in (starts at 1)
Definition: CharScanner.hpp:300
int setTabsize(int size)
set the tabsize. Returns the old tabsize
Definition: CharScanner.hpp:374
virtual void consume()
Definition: CharScanner.hpp:135
virtual void match(const std ::string &s)
Definition: CharScanner.hpp:235
virtual void consumeUntil(int c)
Definition: CharScanner.hpp:161
virtual void append(char c)
Definition: CharScanner.hpp:102
virtual bool getCaseSensitiveLiterals() const =0
virtual int testLiteralsTable(const std ::string &txt, int ttype) const
Definition: CharScanner.hpp:434
virtual void newline()
Definition: CharScanner.hpp:357
virtual bool getCaseSensitive() const
Definition: CharScanner.hpp:275
CharScanner & operator=(const CharScanner &other)
virtual void setLine(int l)
set the line number
Definition: CharScanner.hpp:294
virtual void setColumn(int c)
set the column number
Definition: CharScanner.hpp:305
virtual LexerSharedInputState getInputState()
Definition: CharScanner.hpp:400
virtual void setFilename(const std ::string &f)
Set the filename the scanner is using (used in error messages)
Definition: CharScanner.hpp:316
virtual void match(int c)
See if input contains character 'c' throw MismatchedCharException if not.
Definition: CharScanner.hpp:196
virtual void tab()
Definition: CharScanner.hpp:367
virtual void setInputState(LexerSharedInputState state)
Definition: CharScanner.hpp:407
virtual InputBuffer & getInputBuffer()
Definition: CharScanner.hpp:395
virtual void recover(const RecognitionException &ex, const BitSet &tokenSet)
Definition: CharScanner.hpp:129
virtual ~CharScanner()
Definition: CharScanner.hpp:96
virtual int testLiteralsTable(int ttype) const
Definition: CharScanner.hpp:421
int tabsize
tab size the scanner uses.
Definition: CharScanner.hpp:501
virtual void setCommitToPath(bool commit)
Definition: CharScanner.hpp:326
virtual RefToken makeToken(int t)
Create a new RefToken of type t.
Definition: CharScanner.hpp:504
virtual int toLower(int c) const
Override this method to get more specific case handling.
Definition: CharScanner.hpp:443
virtual void resetText()
Definition: CharScanner.hpp:342
std ::map< std ::string, int, CharScannerLiteralsLess > literals
Definition: CharScanner.hpp:488
virtual void traceIn(const char *rname)
Definition: CharScanner.cpp:84
factory_type tokenFactory
Factory for tokens.
Definition: CharScanner.hpp:486
virtual void setTokenObjectFactory(factory_type factory)
Set the factory for created tokens.
Definition: CharScanner.hpp:413
int traceDepth
Definition: CharScanner.hpp:534
virtual bool getCommitToPath() const
Definition: CharScanner.hpp:321
virtual void commit()
Definition: CharScanner.hpp:121
virtual void match(const BitSet &b)
Definition: CharScanner.hpp:207
int getTabSize() const
Return the tabsize used by the scanner.
Definition: CharScanner.hpp:381
virtual const std::string & getText() const
Definition: CharScanner.hpp:332
virtual void setText(const std ::string &s)
Definition: CharScanner.hpp:337
virtual unsigned int mark()
Mark the current position and return a id for it.
Definition: CharScanner.hpp:185
virtual void matchRange(int c1, int c2)
Definition: CharScanner.hpp:265
virtual int LA(unsigned int i)
Definition: CharScanner.hpp:548
virtual void append(const std ::string &s)
Definition: CharScanner.hpp:115
virtual void match(const char *s)
Definition: CharScanner.hpp:219
virtual void consumeUntil(const BitSet &set)
Definition: CharScanner.hpp:173
LexerSharedInputState inputState
Input state, gives access to input stream, shared among different lexers.
Definition: CharScanner.hpp:493
virtual void rewind(unsigned int pos)
Rewind the scanner to a previously marked position.
Definition: CharScanner.hpp:190
virtual RefToken getTokenObject() const
Definition: CharScanner.hpp:349
std::string text
Definition: CharScanner.hpp:483
bool commitToPath
Definition: CharScanner.hpp:499
bool caseSensitive
Is this lexer case sensitive.
Definition: CharScanner.hpp:487
virtual void uponEOF()
Definition: CharScanner.hpp:466
virtual void traceOut(const char *rname)
Definition: CharScanner.cpp:92
virtual void setCaseSensitive(bool t)
Definition: CharScanner.hpp:280
virtual const std::string & getFilename() const
get the filename for the file currently used
Definition: CharScanner.hpp:311
virtual int getLine() const
Get the line the scanner currently is in (starts at 1)
Definition: CharScanner.hpp:288
bool saveConsumedInput
flag indicating wether consume saves characters
Definition: CharScanner.hpp:485
virtual void matchNot(int c)
Definition: CharScanner.hpp:253
CharScanner(const CharScanner &other)
RefToken _returnToken
used to return tokens w/o using return val
Definition: CharScanner.hpp:490
Definition: InputBuffer.hpp:31
int tokenStartLine
Definition: LexerSharedInputState.hpp:128
int tokenStartColumn
Definition: LexerSharedInputState.hpp:127
Definition: MismatchedCharException.hpp:21
Definition: RecognitionException.hpp:19
Definition: TokenStream.hpp:22
virtual void setColumn(int c)
Definition: Token.cpp:35
virtual void setLine(int l)
Definition: Token.cpp:39
virtual void setType(int t)
Definition: Token.cpp:47
#define ANTLR_USE_NAMESPACE(_x_)
Definition: config.hpp:18
#define ANTLR_API
Definition: config.hpp:22
#define ANTLR_C_USING(_x_)
Definition: config.hpp:21
Definition: ANTLRException.hpp:15