Makefile 0100644 0000764 0001001 00000001174 07331021714 013001 0 ustar Administrator Kein OCAMLC=ocamlc -g
OCAMLOPT=ocamlopt
OPTCOMPOPTS=-inline 20 -unsafe
OPTLINKOPTS=
OCAMLYACC=ocamlyacc
OCAMLLEX=ocamllex
OCAMLDEP=ocamldep
OBJS=main.cmo
LIBS=unix.cma
all: tooimp.opt
tooimp: $(OBJS)
$(OCAMLC) -o $@ $(LIBS) $(OBJS)
clean::
rm -f tooimp tooimp.exe
tooimp.opt: $(OBJS:.cmo=.cmx)
$(OCAMLOPT) $(OPTLINKOPTS) -o $@ $(LIBS:.cma=.cmxa) $(OBJS:.cmo=.cmx)
clean::
rm -f tooimp.opt tooimp.opt.exe
.SUFFIXES: .ml .mli .cmo .cmi .cmx
.mli.cmi:
$(OCAMLC) -c $<
.ml.cmo:
$(OCAMLC) -c $<
.ml.cmx:
$(OCAMLOPT) -c $(OPTCOMPOPTS) $<
clean::
rm -f *.cm[iox] *.o
depend:
$(OCAMLDEP) *.mli *.ml > .depend
include .depend
README 0100644 0000764 0001001 00000000641 07331021722 012216 0 ustar Administrator Kein Team name: Too Imperative
Program name: tooimp
Single Member: Stephan Tolksdorf andorxor@gmx.de
Entry description:
Attempt of a linear optimizer written in OCaml.
Hadn't mucht time this weekend. So couldn't write an optimizer at all...
But I'm curious if my entry will pass the examples.
Building:
I've developed "tooimp" under Cygwin. Hope it builds under Linux.
Thank you for the interesting contest task!
buildme 0100755 0000764 0001001 00000000032 07331021704 012677 0 ustar Administrator Kein #!/bin/sh
make tooimp.opt
main.ml 0100644 0000764 0001001 00000033064 07331021560 012621 0 ustar Administrator Kein (* double linked list by Xavier Leroy *)
type 'a cdllist =
{ data: 'a; mutable prev: 'a cdllist; mutable next: 'a cdllist }
let create d =
let rec l = { data = d; prev = l; next = l } in l
let insert_before d list =
let newcons = { data = d; prev = list.prev; next = list } in
list.prev.next <- newcons;
list.prev <- newcons;
newcons
let insert_after list d =
let newcons = { data = d; prev = list; next = list.next } in
list.next.prev <- newcons;
list.next <- newcons;
newcons
let remove list =
list.prev.next <- list.next;
list.next.prev <- list.prev
(*exceptions*)
exception Timeout
exception InterpreterError
exception ImaLooser
(* Get file from channel *)
let get_file_as_string_list bsize ic =
let b = String.create bsize in (* buffer string *)
let gi = ref 0 in (* this counter represent the read in bytes*)
let rec get_buffers () =
let i = input ic b 0 bsize in
match i with
| 0 -> []
| _ -> gi:= !gi + i;
let hd = String.sub b 0 i in
hd :: (get_buffers ())
in
let sl = get_buffers () in
(!gi, sl)
(* core *)
type property_color = PC_r | PC_g | PC_b | PC_c | PC_m | PC_y | PC_k | PC_w |
PC_pre (* pre means predefined *)
type char_property = { mutable p_b : bool;
mutable p_em : bool;
mutable p_i : bool;
mutable p_s : bool;
mutable p_tt : bool;
mutable p_u : int; (* integer between 0 and 3 *)
mutable p_size : int; (* integer between 0 and 9; -1 means predefined *)
mutable p_color: property_color; }
let root_context = {p_b = false; p_em = false; p_i = false; p_s = false;
p_tt = false; p_u = 0; p_size = -1; p_color = PC_pre}
let pcopy p = {p_b = p.p_b; p_em = p.p_em; p_i = p.p_i; p_s = p.p_s;
p_tt = p.p_s; p_u = p.p_u; p_size = p.p_size; p_color = p.p_color}
let space_context p = let np = {p with p_s = false; p_em = false; p_i = false; p_b = false} in
if np.p_u = 0 then np.p_color <- PC_w;
np
type document_token = char_property * string
type smlng_tag = TAG_B | TAG_EM | TAG_I | TAG_S | TAG_TT | TAG_U |
TAG_SIZE of int | TAG_COLOR of property_color
type smlng_token = { mutable pretags: smlng_tag cdllist;
mutable token: document_token;
mutable posttags: smlng_tag cdllist }
let emptytags = create TAG_B (* should be understood as a constant... *)
let emptytoken () = {pretags = emptytags; token = (pcopy root_context, ""); posttags = emptytags}
let interpret_smlng sl = (* too imperative isn't it? hope it's at least fast enough *)
(* token *)
let t_length = ref 1024 in
let t = ref (String.create !t_length) in
let t_pos = ref 0 in
let rec add_char c =
if t_pos < t_length then
begin
String.unsafe_set !t !t_pos c; t_pos:= !t_pos + 1
end else
begin
let newlen = int_of_float ((float_of_int !t_length) *. 1.5) in
let t2 = String.create newlen in
String.unsafe_blit !t 0 t2 0 !t_length;
t:= t2; t_length:= newlen;
add_char c
end
in
let get_token () = String.sub !t 0 !t_pos in
(* temp property *)
let p = ref (pcopy root_context) in
(* flags *)
let in_tag = ref false in
let in_string = ref false in
let end_tag = ref false in
let set_newcontext = ref true in
let last_char = ref '\255' in
let last_context = ref (pcopy !p) in
let toks = create (emptytoken ()) in (* toks is an "achor" item without content *)
let stack = Stack.create () in
let rec rinterpret = function
| [] -> ()
| hd::tl
-> let str = hd in
let l = String.length str in
for i = 0 to l do
let c = String.unsafe_get str i in
match c with
| '<' -> if !in_string then
begin
ignore (insert_before
{pretags = emptytags; token = (pcopy !p, get_token ()); posttags = emptytags} toks);
in_string:= false;
t_pos:= 0;
end;
in_tag:= true
| '/' -> if !in_tag then end_tag:= true
else
begin
add_char c;
last_char:= ' ';
if !set_newcontext then begin last_context:= !p; set_newcontext:= false end;
in_string:= true;
end
| '>' -> let tag = get_token () in
if !end_tag then
begin
try
p:= Stack.pop stack;
with
| Stack.Empty -> prerr_endline ("Error: Unmatched tag (" ^ tag ^ ") in source!");
raise InterpreterError
end else
begin
Stack.push (pcopy !p) stack;
match tag with
| "B" -> !p.p_b <- true
| "EM" -> if not !p.p_s then (if !p.p_em then !p.p_em <- false else !p.p_em <- true)
| "I" -> !p.p_i <- true
| "PL" -> !p.p_s <- false; !p.p_em <- false; !p.p_i <- false;
!p.p_b <- false; !p.p_tt <- false; !p.p_u <- 0
| "S" -> !p.p_s <- true; !p.p_em <- false
| "TT" -> !p.p_tt <- true
| "U" -> if !p.p_u < 3 then !p.p_u <- !p.p_u + 1
| "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8"
| "9" -> !p.p_size <- int_of_string tag
| "r" -> !p.p_color <- PC_r
| "g" -> !p.p_color <- PC_g
| "b" -> !p.p_color <- PC_b
| "c" -> !p.p_color <- PC_c
| "m" -> !p.p_color <- PC_m
| "y" -> !p.p_color <- PC_y
| "k" -> !p.p_color <- PC_k
| "w" -> !p.p_color <- PC_w
| _ -> prerr_endline ("Error: Unsupported tag (" ^ tag ^ ") in source!");
raise InterpreterError
end;
in_tag:= false;
end_tag:= false;
t_pos:= 0;
set_newcontext:= true
| '\032' (* SPC *)
| '\013' (* CR *)
| '\010' (* LF *)
| '\009' (* TAB *)
-> if !last_char != ' ' ||
!p.p_tt || (space_context !last_context) <> (space_context !p) then
begin
add_char c;
last_char:= ' ';
if !set_newcontext then begin last_context:= !p; set_newcontext:= false end;
in_string:= true;
end
| _ -> add_char c;
if not !in_tag then
begin
last_char:= c;
if !set_newcontext then begin last_context:= !p; set_newcontext:= false end;
in_string:= true;
end
done;
rinterpret tl
in rinterpret sl; toks
let optimize_smlng toks = (* minor optimizations *)
let rec iter_rec l =
if l = toks then toks
else
let p, s = l.data.token in
let n, (pn, sn) = l.next, l.next.data.token in
if n != toks && p = pn then (* concatenate adjacent nodes with identical properties *)
let () = l.data.token <- (p, s ^ (sn)); remove n in
iter_rec l
else
if (String.length s = 1 && (s = "\032" || s = "\013" || s = "\010" || s = "\009")) then
let scp = space_context p in (* add single whitespace to a node with identical whitespace context *)
if n != toks && scp = (space_context pn) then
let () = n.data.token <- (pn, s ^ sn); remove l in
iter_rec n
else
let pr, (ppr, spr) = l.prev, l.prev.data.token in
if pr != toks && scp = (space_context ppr) then
let () = pr.data.token <- (ppr, spr ^ s); remove l in
iter_rec pr
else iter_rec l.next
else iter_rec l.next
in iter_rec toks.next
let reencode_smlng toks = (* fill pretags and posttags *)
let rec scan_for f ts =
if ts == toks then toks
else if f (fst ts.data.token) then ts else scan_for f ts.next
in
let distance f ts =
let rec rdist a ts =
if ts == toks then (a - 1)
else if f (fst ts.data.token) then a else rdist (a + 1) ts.next
in rdist 0 ts
in
let scan_nprecolor = scan_for (function p -> p.p_color <> PC_pre) in
let scan_color c = scan_for (function p -> p.p_color = c) in
let scan_b = scan_for (function p -> p.p_b) in
let scan_nb = scan_for (function p -> not p.p_b) in
let dist_nb = distance (function p -> not p.p_b) in
let p = ref (pcopy root_context) in
(* hadn't time to write the actual optimizing encoder...
but I think a linear approuch could be quite successfull *)
let rec simpleencode ts =
let enctag t =
if ts.data.pretags == emptytags then ts.data.pretags <- create t
else ignore (insert_before t ts.data.pretags);
if ts.data.posttags == emptytags then ts.data.posttags <- create t
else ignore (insert_after ts.data.posttags t);
in
if ts != toks then
begin
let np = fst ts.data.token in
if np.p_b then enctag TAG_B;
if np.p_em then enctag TAG_EM;
if np.p_i then enctag TAG_I;
if np.p_s then enctag TAG_S;
if np.p_tt then enctag TAG_TT;
if np.p_u > !p.p_u then enctag TAG_U;
if np.p_size <> -1 then enctag (TAG_SIZE np.p_size);
if np.p_color <> PC_pre then enctag (TAG_COLOR np.p_color);
p:= np;
if ts.data.posttags != ts.data.posttags.next then ts.data.posttags <- ts.data.posttags.next;
simpleencode ts.next;
end else ts
in
simpleencode toks.next
let output_toks toks maxlen oc =
let get_toks_as_string_list ts bsize =
let get_tags_as_string tags pre =
let string_of_tag = function
| TAG_B -> if pre then "" else ""
| TAG_EM -> if pre then "" else ""
| TAG_I -> if pre then "" else ""
| TAG_S -> if pre then "" else ""
| TAG_TT -> if pre then "" else ""
| TAG_U -> if pre then "" else ""
| TAG_SIZE s -> if pre then "<" ^ string_of_int s ^ ">" else "" ^ string_of_int s ^ ">"
| TAG_COLOR c when c = PC_r -> if pre then "" else ""
| TAG_COLOR c when c = PC_g -> if pre then "" else ""
| TAG_COLOR c when c = PC_b -> if pre then "" else ""
| TAG_COLOR c when c = PC_c -> if pre then "" else ""
| TAG_COLOR c when c = PC_m -> if pre then "" else ""
| TAG_COLOR c when c = PC_y -> if pre then "" else ""
| TAG_COLOR c when c = PC_k -> if pre then "" else ""
| TAG_COLOR c when c = PC_w -> if pre then "" else ""
in
if tags != emptytags then
let rec rgettag t =
let s2 = string_of_tag t.data in
if t.next != tags then s2 ^ rgettag t.next
else s2
in rgettag tags
else ""
in
let bpos = ref 0 in
let b = String.create bsize in (* buffer string *)
let gi = ref 0 in (* this counter represent the read in bytes*)
let rec get_buffers ts =
if ts == toks then
if !bpos > 0 then [String.sub b 0 !bpos]
else []
else
let s1 = get_tags_as_string ts.data.pretags true in
let s2 = snd ts.data.token in
let s3 = get_tags_as_string ts.data.posttags false in
let l1, l2, l3 = String.length s1, String.length s2, String.length s3 in
let ll = l1 + l2 + l3 in
if ll > bsize then
begin
let s4 = String.sub b 0 !bpos in
gi:= !gi + ll; bpos:= 0;
[s4; s1; s2; s3] @ (get_buffers ts.next)
end else
begin
let s4 = ref "" in
if ll > bsize - !bpos then
begin
s4:= String.sub b 0 !bpos;
bpos:= 0;
end;
String.blit s1 0 b !bpos l1; bpos:= !bpos + l1;
String.blit s2 0 b !bpos l2; bpos:= !bpos + l2;
String.blit s3 0 b !bpos l3; bpos:= !bpos + l3;
gi:= !gi + ll;
if !s4 <> "" then !s4 :: (get_buffers ts.next)
else (get_buffers ts.next)
end
in
let sl = get_buffers ts in
(!gi, sl)
in
let optlength, optsl = get_toks_as_string_list toks.next 4096 in
let () = prerr_endline (string_of_int optlength) in
if optlength >= maxlen then raise ImaLooser
else List.iter (function str -> output stdout str 0 (String.length str)) optsl
(* glue together *)
let main () =
let limit = try
int_of_string (Sys.getenv "TLIMIT");
with
_ -> prerr_endline "Error: Couln't read limit from Env. Limit set to 180 Secs.";
180
in
ignore(Unix.alarm (max 165 (limit - 15)));
Sys.set_signal Sys.sigalrm (Sys.Signal_handle (fun _ -> prerr_endline "Time is up!"; raise Timeout));
set_binary_mode_in stdin true;
set_binary_mode_out stdout true;
let ic = stdin in
let (filelength, sl) = (get_file_as_string_list 4096 ic) in
let () = close_in ic in
try
output_toks (reencode_smlng (optimize_smlng (interpret_smlng sl))) filelength stdout;
with
| _ -> List.iter (function str -> output stdout str 0 (String.length str)) sl
let () = Printexc.catch main ()
neu.tar 0100644 0000764 0001001 00000000000 07331022350 012620 0 ustar Administrator Kein runme 0100755 0000764 0001001 00000000035 07331021754 012414 0 ustar Administrator Kein #!/bin/sh
exec ./tooimp.opt