|
|
Parsing
g1=
digraph G {
graph [rankdir = LR ];
something we don't care about;
graph [lp = "229,
330"];
} |
parse.out=
--| chars |------
100
105
103
114
97
112
104
32
71
32
123
10
32
32
32
32
32
103
114
etc.,
--| tokens |------
graph
$sopen
rankdir
$equal
LR
$sclose
$end
something
we
don't
care
about
$end
graph
$sopen
lp
$equal
$quote
229
$comma
330
$quote
$sclose
$end
--| statements |------
rankdir=LR
lp=[229, 330]
|
parse.pl=
% parse.pl
% finding statements of the form X=Y
% within a complex string
:- [format].
demo :- tell('parse.out'), ignore(demo1), told.
demo1 :-
chars(g1,Chars), % seperate file I/O...
tokens(Tokens,Chars,[]), % .. from tokenisation
parse(Statements,Tokens,[]), % .. from parsing
!, % <--- block backtracking- otherwise DCGs
% can go awol.
format('\n--| chars |--\n ~20L\tetc.,\n\n',[Chars]),
format('\n--| tokens |--\n ~L', [Tokens]),
format('\n--| statements |--\n ~L', [Statements]).
% note strange convention of DCGs:
% inputs string is second last arg
% outputs are the other args
% file i/o
chars(F,List) :- see(F), chars1(List), !, seen.
chars(_,[]) :- seen. % catch predicate if chars1 fails.
% never leave files open!
chars1(L) :-
get0(X), % <-- primitive character reader
chars2(X,L).
chars2(-1,[]) :- !.
chars2(H, [H|T]) :-
get0(Next),
chars2(Next,T).
/* understanding text strings is really three problems:
1) tokenization -
2) parsing
3) interpretation
Tokenization:
The individual atomic expressions of a language
are (usually) written down as sequences of simpler
characters. The text is ultimately then a
character stream. The task of a tokenizer is at
least to segment this character sequence turning
it into a token stream. The tokens (the fancy word
in this context for 'word' or 'atomic expression')
are often also assigned a category. We will support
two categories: $X will denote special words and X
(without a "$") will denote everything else.
*/
% note that we can "eat" more that one character in a leaf dcg
special(digraph) --> "digraph".
special(equal) --> "=".
special(comma) --> ",".
special(end) --> ";".
special(quote) --> [34]. % 34= "
% curly brackets
special(copen) --> "{".
special(cclose) --> "}".
% round brackets
special(ropen) --> "(".
special(rclose) --> ")".
% square brackets
special(sopen) --> "[".
special(sclose) --> "]".
/*
Once tokenized, the input stream is smaller, more abstract,
makes writing and debugging a parser easier.
*/
%-------------------------------------
% parser- assuming file written
parse(S) --> statements(S).
% zero or more statements,
% and sometimes we collect information
% from them
statements([]) --> [].
statements([One|Rest]) -->
[graph, $sopen], % statements beginning
% with keyword "graph",
% we collect
assignment(One),
[$sclose,$end],
statements(Rest).
statements(Rest) --> % all other statements
% are dull and we will
% ignore them
dull,
[$end],
statements(Rest).
assignment(X=Y) --> [X,$equal], values(Y).
% values are either quotes lists...
values(Y) --> [$quote], items(Y), [$quote].
% ... or single items
values(Y) --> [Y].
% items are connect to other items via commas
items([One|Rest]) --> [One,$comma],items(Rest).
items([One]) --> [One].
% dull stuff- use carefully.
dull --> [].
dull --> [_], dull.
%-------------------------------------
% here's a tokenizer for graphviz files.
tokens(T) -->
header,
tokens1(T), % ignore header and footer
footer.
header -->
whites, % skip over whitespace
"digraph", % read, and ignore, a keyword
whites,
"G",
whites,
"{".
footer -->
whites,
"}",
whites.
% program body holds zero or more tokens
tokens1([]) --> [].
tokens1([One|Rest]) --> token(One), tokens1(Rest).
% tokens may have leading white space
token(X) --> whites, token1(X).
% two token types:
% type 1: specials (denoted with a leading "$"
token1($X) --> special(X).
% type 2: everything else
% note the use of predicate ordering to control
% the parse- hence, no backtracking please outside
% the call to a DCG parser.
token1(X)--> blacks(X). % reads all non-white,
% non-special characaters
% spin down the input string to the first
% non-white space thing.
% note: good black to write a "comment skipper"
whites([],[]) :- !.
whites([H|T],Out) :-
white(H),
!,
whites(T,Out).
whites(L,L).
white(H) :- space([H],[]).
space --> " " | tabb| newline.
tabb --> [9].
newline --> [10].
% spin down the input string to the first
% non-white space or non-special thing.
blacks(X,L0,L) :-
blacks1(Y,L0,L),
name(X,Y). %<-- insert "num" here
blacks1([],[],[]) :- !.
blacks1([],L,L) :-
% tricky stuff- if the NEXT thing
% is a special, stop here and
% return the list, unchanged
special(_,L,_),!.
blacks1([H|Blacks],[H|T],Rest) :-
\+ white(H),
!,
blacks1(Blacks,T,Rest).
blacks1([],L,L).
% phew- thank heavens that we don't need to do
% this everytime we write a DSL- for languages
% we can define with infix,prefix, postfix operators,
% prolog's "read" predicate does all this for us.
|
format.pl=
% format.pl
%--------------------------------------
% stuff to simplify printing clauses.
% swi-prolog lets a programmer customize
% the format statement.
% FIRST, a predicate of arity 2 is registerred
% next to some letter
:- format_predicate('P',p(_,_)).
% SECOND, write the predicate.
p(default,X) :- !, p(0,X).
p(_,(X :- true)) :- !, format('~p.\n',X).
p(_,(X :- Y )) :- !, portray_clause((X :- Y)).
p(N,[H|T] ) :- !, not(not((numbervars([H|T],N,_),
format('~p',[[H|T]])))).
p(N,X ) :- not(not((numbervars(X,N,_),
format('~p',X)))).
%--------------------------------------
% stuff to simplify right justifying text
% FIRST:
:- format_predicate('>',padChars(_,_)).
% SECOND:
padChars(default,A) :-
padChars(5,A).
% the first arg "S" is the optional argument
% someone may have given with the "~" command
padChars(S,A) :-
writeThing(A,Thing,N),
Pad is S - N,
% standard trick to emulate
% for(i=1;i<=N;i++) { doThis }
forall(between(1,Pad,_),put(32)),
write(Thing).
writeThing(X,S,L) :-
% sformat returns the string in
% the first arg
sformat(S,'~w',[X]), string_length(S,L).
%--------------------------------------
% stuff to simplify left justifying text
% FIRST:
:- format_predicate('<',charsPad(_,_)).
% SECOND:
charsPad(default,A) :- charsPad(5,A).
charsPad(S,A) :-
writeThing(A,Thing,N),
atom_length(A,N),
Pad is S - N,
write(Thing),
forall(between(1,Pad,_),put(32)).
%--------------------------------------
% stuff to simplify printing N twiddles,
% scaled to some factor
% FIRST:
:- format_predicate('S',twiddle(_,_)).
% SECOND:
twiddle(default,A) :- twiddle(25,A).
twiddle(W,N) :-
N1 is round(N/W),
forall(between(1,N1,_),put(126)).
%--------------------------------------
% stuff to simplify printing lists
% scaled to some factor
% FIRST:
:- format_predicate('L',printL(_,_)).
% SECOND:
printL(default,List) :- printL(10^6,List).
printL(TooLong,List) :-
forall((nth1(Pos,List,Item),
Pos < TooLong),
format('\t~w\n',Item)).
|
Not © Tim Menzies, 2001
Share and enjoy- information wants to be free.
But if you take anything from this site, please credit tim@menzies.com.
|
|