TAOにはソートする関数があるが、オンメモリでしかできない。借用したマシンは512KBしかなかった。ディスク上にある50MB程度のテキストファイルをソートするために、元ファイルをメモリ内に収まるよう分割してソートし、最後にそれらをマージソートして出力する。ソース中には試行錯誤の跡がある。また、poolなどグローバル変数を使っている。わかりづらい。多分、途中まではマージソートを手作業でやって確認してたのだろう。なおcsv関係の関数はおまけです。
ソースファイルをmorememoryで確保できた領域に読める分だけ読み込み、ランという単位に分けてソートし出力する。次に、複数のランをマージソートして一本のファイルにする。
Tao>(lmakerun "infile1.txt" "infile2.txt" "infile3.txt" "runfile") Tao>(lmergerun "sorted.txt" "runfile")
; [es24]bs:exsort.tao.61, 20-Feb-91 21:35:18, Edit by Idaten
;
; 外部ファイルのソート exsort.tao
;
; TAO mode
; IDATEN/K.Takeshita
; 91/01/07 ( csv2list csv-to-list list2csv )
; 91/01/08 ( list-to-csv fsort )
;
(de csv2list( str &aux p )
( if ( null (!p (smemq-case "," str)) ) (list str)
( cons (substring str 0 p) (csv2list (substring str (+ p 1))) )
)
)
(de csv-to-list ( fp )
( loop (&aux s c) (:init (!c 'nil))
(if (eq (!s (read-line fp)) :eof) (return c)
(!c (cons (csv2list s) c) )
)
)
)
(de list2csv( l )
( if ( eq (length l) 1 ) (car l)
( string-append (car l) "," (list2csv (cdr l)))
)
)
(de list-to-csv( fp li )
( mapc (lambda (x) (write-line (list2csv x) fp)) li )
)
(de fsort( func infile outfile )
(let ( (in (open infile)) (out (open outfile :direction :output)) )
(list-to-csv out (sortcar (csv-to-list in) func ))
(close in) (close out)
)
)
;
; linesort
; 91/02/19 by Idaten
;
(de linesort( infile outfile &aux in out n )
(!n nil)
(with-open-file (in infile)
(loop (&aux a)
(if (eq (!a (read-line in)) :eof) (exit-loop) )
(!n (cons a n))
)
)
(with-open-file (out outfile :direction :output)
(mapc (lambda (x) (write-line x out)) (sort n #'string-lessp ) )
)
)
;
; rotate-l
;
(de rotate-l (x)
(if (listp x)
(nconc (cdr x) (list (car x)) )
x
)
)
;
; makerun run-factor infile outfiles infile.#run#.ver
; 91/02/15
(de makerun ( infile &optn run-factor )
( if (integerp run-factor) (!factor run-factor) (!factor 10000) )
(with-open-file (in infile)
(loop (&aux r fp vers outfile ) (:init (!vers 1))
(!outfile (namestring (make-pathname :name (pathname-name infile) :type "#run#" :version vers )) )
(if (null (!r (read-pool in factor ))) (return) )
(!fp (open outfile :direction :output :if-exists :error :if-does-not-exist :create))
(list-to-csv fp (sortcar r #'string-lessp) )
(close fp)
(inc vers)
(set-keep-generation-count outfile vers)
)
)
)
;
; read-pool poolに最大count行読み込む
;
(de read-pool ( fp count )
( loop (&aux ct s pool) (:init (!ct 0) (!pool nil) )
(:while (< ct count) (return pool) )
(if (eq (!s (read-line fp)) :eof) (return pool) )
(inc ct)
(!pool (cons (csv2list s) pool) )
)
)
;
; read-pool-line poolに最大count行読み込む
; 91/02/20
(de read-pool-line ( fp count )
( loop (&aux ct s pool) (:init (!ct 0) (!pool nil) )
(:while (< ct count) (return pool) )
(if (eq (!s (read-line fp)) :eof) (return pool) )
(inc ct)
(!pool (cons s pool))
)
)
;
; lmakerun 指定した入力ファイルからランを作る
; 91/02/20
(de lmakerun ( infiles runname &optn run-factor &aux factor in vers)
( if (integerp run-factor) (!factor run-factor) (!factor 20000) )
( if (atom infiles) (!infiles (list infiles)) )
(!vers 1)
( mapc (lambda (x)
( with-open-file (in x)
(loop (&aux r fp outfile)
(if (null (!r (read-pool-line in factor))) (return))
(!outfile (namestring (make-pathname :name runname :type "#run#" :version vers)) )
(!fp (open outfile :direction :output ))
(mapc (lambda (x) (write-line x fp)) (sort r #'string-lessp))
(close fp)
(inc vers)
(set-keep-generation-count outfile vers)
)
)
) infiles )
)
;
; lmergerun 複数のランをマージする
; 91/02/21
(de lmergerun ( outfile runname &aux in out heap)
(!in (mapcar #'open (all-files (namestring (make-pathname :name runname :type "#run#" )) nil)))
(!heap (mapcar (lambda (x) (list (read-line x) x)) in))
(with-open-file (out outfile :direction :output)
(loop ;(&aux n m h)
(if (null heap) (return) )
(!n (car (!h (sortcar heap #'string-lessp)) ) ) ; 書き出すデータ
(write-line (car n) out)
(if (eq (!m (read-line (second n) )) :eof)
(progn (!heap (cdr h)) (close (second n)))
(!heap (cons (list m (second n)) (cdr h)))
)
)
)
)
;
; makerun1 単一ファイルから読んだデータをランに分ける
;
(de makerun1 ( factor infile outfiles &aux oc )
(!oc (length outfiles))
(with-open-file (in infile)
(loop (&aux n r fp) (:init (!n 0))
(if (null (!r (read-pool in factor ))) (return) )
(!fp (open (nth n outfiles) :direction :output :if-exists :append :if-does-not-exist :create))
(write (list (length r) infile) fp )
(mapc (lambda (x) (write x fp)) (sortcar r #'string-lessp) )
(close fp)
(inc n)
(if (>= n oc) (!n 0))
)
)
)
;
; makerun2 単一ファイルから読んだデータをランに分ける
;
(de makerun2 ( factor infile outfiles )
(with-open-file (in infile)
(loop (&aux r fp cy ) (:init (!cy outfiles))
(if (null (!r (read-pool in factor ))) (return) )
(!fp (open (car cy) :direction :output :if-exists :append :if-does-not-exist :create))
(write (list (length r) infile) fp )
(mapc (lambda (x) (write x fp)) (sortcar r #'string-lessp) )
(close fp)
(!cy (rotate-l cy))
)
)
)
;
; marge-run1 ランをマージする(1パス)
;
(de merge-run1 ( infile1 infile2 outfile &aux in1 in2 out a b )
(progn
(!in1 (open infile1))
(!in2 (open infile2))
( with-open-file (out outfile :direction :output )
(progn
(!a (read-line in1))
(!b (read-line in2))
(loop
(if (and (eq a :eof) (eq b :eof)) (return nil) )
(if (eq a :eof) (progn ( write-line b out) (!b (read-line in2)) (cycle) ))
(if (eq b :eof) (progn ( write-line a out) (!a (read-line in1)) (cycle) ))
(if (string-lessp (car (csv2list a)) (car (csv2list b)))
(progn ( write-line a out ) (!a (read-line in1)) )
(progn ( write-line b out ) (!b (read-line in2)) )
)
)
)
)
(close in1)
(close in2)
)
)
;
; pickup-csv csv形式のテキストから特定のフィールドを抽出する
; 91/02/23
(de pickup-csv ( infile outfile fields)
(if (atom fields) (!fields (list fields)))
(with-open-file (in infile)
(with-open-file (out outfile :direction :output)
(loop (&aux n) (:init (!n nil))
(if (eq (!n (read-line in)) :eof) (return nil))
(write-line (list2csv (mapcar (lambda (x) (nth x (csv2list n))) fields) ) out)
)
)
)
)