CSV形式ファイルのソート

説明

外部ファイルのソート」と「CSV形式のファイルを扱う」と内容がかなり重複しているが一応載せます。目新しいのはpickup-csvで、CSV形式ファイルから特定のフィールド(複数指定可)を抽出してファイルに書き出します。

ELIS復活祭

[戻る]

実行例

Tao>(pickup-csv "infile.csv" "field246.txt" '(2,4,6))
[戻る]

ソースコード


;
;  csv形式ファイルのソート  csvsort.tao
;
;  TAO mode
;  IDATEN/K.Takeshita
;

(de csv2list( str &aux p )
    ( if ( null (!p (smemq-case "," str)) ) (list str)
     ( cons (substring str 0 p) (csv2list (substring str (inc p))) )
     ))
(de list2csv( l )
    ( if ( eq (length l) 1 ) (car l)
     ( sconc (car l) "," (list2csv (cdr l)))
     ))
(de csv-to-list( fp )
    ( loop (&aux s c) (:init (!c 'nil))
     (if (eq (!s (read-line fp)) :eof) (return c)
	 (!c (cons (csv2list s) c) )
	 )
     ))
(de list-to-csv( fp li )
    ( mapc (lambda (x) (write-line (list2csv x) fp)) li )
    )
(de csv-to-list-n1( fp p)
    ( loop (&aux s c a) (:init (!c 'nil))
     (if (eq (!s (read-line fp)) :eof) (return c))
     (!a (csv2list s))
     (!c (cons (cons (nth p a) a) c))
     ))
(de list-to-csv-cdr( fp li )
    ( mapc (lambda (x) (write-line (list2csv (cdr x)) fp)) li )
    )

(de csvsort1( infile outfile p &aux in out pp)
    (!func (if (minusp p) 'string-greaterp 'string-lessp))
    (!pp (inc (abs p)))
    (with-open-file (in infile)
      (with-open-file (out outfile :direction :output)
	(list-to-csv-cdr out (sortcar (csv-to-list-n1 in pp) func) )
	)))

;
;  linesort 
;  91/02/19 by Idaten
;
(de linesort( infile outfile &aux in out n )
    (!n nil)
    (with-open-file (in infile)
      (loop (&aux a)
	    (if (eq (!a (read-line in)) :eof) (exit-loop) )
	    (!n (cons a n))
	    )
      )
    (with-open-file (out outfile :direction :output)
      (mapc (lambda (x) (write-line x out)) (sort n #'string-lessp ) )
      )
    )

;
;  rotate-l 
;
(de rotate-l (x)
    (if (listp x)
	(nconc (cdr x) (list (car x)) )
        x
    )
)

;
;  makerun  run-factor infile   outfiles infile.#run#.ver
;  91/02/15

(de makerun ( infile &optn run-factor )
    ( if (integerp run-factor) (!factor run-factor) (!factor 10000) ) 
    (with-open-file (in infile)
        (loop (&aux r fp vers outfile ) (:init (!vers 1))
	      (!outfile (namestring (make-pathname :name (pathname-name infile) :type "#run#" :version vers )) )
	      (if (null (!r (read-pool in factor ))) (return) )
	      (!fp (open outfile :direction :output :if-exists :error :if-does-not-exist :create))
	      (list-to-csv fp (sortcar r #'string-lessp) )
	      (close fp)
	      (inc vers)
	      (set-keep-generation-count outfile vers)
	)
    )
)

;
;  read-pool poolに最大count行読み込む
;
(de read-pool ( fp count )
    ( loop (&aux ct s pool) (:init (!ct 0) (!pool nil) )
          (:while (< ct count) (return pool) )
	  (if (eq (!s (read-line fp)) :eof) (return pool) ) 
	  (inc ct)
          (!pool (cons (csv2list s) pool) )
    )
)

;
;  read-pool-line poolに最大count行読み込む
;  91/02/20

(de read-pool-line ( fp count )
    ( loop (&aux ct s pool) (:init (!ct 0) (!pool nil) )
     (:while (< ct count) (return pool) )
     (if (eq (!s (read-line fp)) :eof) (return pool) )
     (inc ct)
     (!pool (cons s pool))
     )
    )

;
;  lmakerun 指定した入力ファイルからランを作る 
;  91/02/20

(de lmakerun ( infiles runname &optn run-factor &aux factor in vers)
    ( if (integerp run-factor) (!factor run-factor) (!factor 20000) )
    ( if (atom infiles) (!infiles (list infiles)) )
    (!vers 1)
    ( mapc (lambda (x)
	       ( with-open-file (in x)
		(loop (&aux r fp outfile)
		      (if (null (!r (read-pool-line in factor))) (return))
		      (!outfile (namestring (make-pathname :name runname :type "#run#" :version vers)) )
		      (!fp (open outfile :direction :output ))
		      (mapc (lambda (x) (write-line x fp)) (sort r #'string-lessp))
		      (close fp)
		      (inc vers)
		      (set-keep-generation-count outfile vers)
		      )
		)
	       ) infiles )
    )

;
;  lmergerun 複数のランをマージする
;  91/02/21

(de lmergerun ( outfile runname &aux in out heap)
    (!in (mapcar #'open (all-files (namestring (make-pathname :name runname :type "#run#" )) nil)))
    (!heap (mapcar (lambda (x) (list (read-line x) x)) in))
    (with-open-file (out outfile :direction :output)
      (loop ;(&aux n m h)
           (if (null heap) (return) )
	   (!n (car (!h (sortcar heap #'string-lessp)) ) )  ; 書き出すデータ
	   (write-line (car n) out)
	   (if (eq (!m (read-line (second n) )) :eof)
	       (progn (!heap (cdr h)) (close (second n)))
	       (!heap (cons (list m (second n)) (cdr h)))
	   )
       )
     )
)

;
;  makerun1 単一ファイルから読んだデータをランに分ける
;
(de makerun1 ( factor infile outfiles &aux oc )
    (!oc (length outfiles))
    (with-open-file (in infile)
        (loop (&aux n r fp) (:init (!n 0))
	      (if (null (!r (read-pool in factor ))) (return) )
	      (!fp (open (nth n outfiles) :direction :output :if-exists :append :if-does-not-exist :create))
	      (write (list (length r) infile) fp )
	      (mapc (lambda (x) (write x fp)) (sortcar r #'string-lessp) )
	      (close fp)
	      (inc n)
	      (if (>= n oc) (!n 0))

	)
    )
)


;
;  makerun2 単一ファイルから読んだデータをランに分ける
;
(de makerun2 ( factor infile outfiles )
    (with-open-file (in infile)
        (loop (&aux r fp cy ) (:init (!cy outfiles))
	      (if (null (!r (read-pool in factor ))) (return) )
	      (!fp (open (car cy) :direction :output :if-exists :append :if-does-not-exist :create))
	      (write (list (length r) infile) fp )
	      (mapc (lambda (x) (write x fp)) (sortcar r #'string-lessp) )
	      (close fp)
	      (!cy (rotate-l cy))
	)
    )
)


;
;  marge-run1  ランをマージする(1パス) 
;
(de merge-run1 ( infile1 infile2 outfile &aux in1 in2 out a b )
    (progn 
    (!in1 (open infile1))
    (!in2 (open infile2))
    ( with-open-file (out outfile :direction :output )
         (progn
	     (!a (read-line in1))
	     (!b (read-line in2))
	     (loop
	         (if (and (eq a :eof) (eq b :eof)) (return nil) )
		 (if (eq a :eof) (progn ( write-line b out) (!b (read-line in2)) (cycle) ))
		 (if (eq b :eof) (progn ( write-line a out) (!a (read-line in1)) (cycle) ))
	         (if (string-lessp (car (csv2list a)) (car (csv2list b)))
		     (progn ( write-line a out ) (!a (read-line in1)) )
		     (progn ( write-line b out ) (!b (read-line in2)) )
	         )
	     )
	 )
    )
    (close in1)
    (close in2)
    )
)

;
;  pickup-csv csv形式のテキストから特定のフィールドを抽出する
;  91/02/23

(de pickup-csv ( infile outfile fields)
    (if (atom fields) (!fields (list fields)))
    (with-open-file (in infile)
      (with-open-file (out outfile :direction :output)
	(loop (&aux n) (:init (!n nil))
	      (if (eq (!n (read-line in)) :eof) (return nil))
	      (write-line (list2csv (mapcar (lambda (x) (nth x (csv2list n))) fields) ) out)
	      )
	)
      )
    )


[戻る]
inserted by FC2 system