¡ü

CSV¤òTSV¤ØÊÑ´¹¤¹¤ë¥×¥í¥°¥é¥à

Á°²ó¤ÏCSV¥Ç¡¼¥¿¤òTSV¥Ç¡¼¥¿¤ËÊÑ´¹¤¹¤ë¥×¥í¥°¥é¥à¤òºîÀ®¤·¤¿¡£¤³¤Î¥×¥í¥°¥é¥à¤Ï¼ç¤Ë¼¡¤Î3¤Ä¤Î¥Õ¥¡¥¤¥ë¤Ç¹½À®¤µ¤ì¤Æ¤¤¤ë¡£

util_file.c¤Ë¥Õ¥¡¥¤¥ë´ØÏ¢¤Î½èÍý¤ò½ñ¤¤¤Æ¤¤¤­¡¢util_csv.c¤ËCSV´ØÏ¢¤Î½èÍý¤ò½ñ¤¤¤Æ¤¯¡¢¤È¤¤¤Ã¤¿´¶¤¸¤À¡£main.c¤«¤é¤³¤Î2¤Ä¤Î½èÍý¤ò¸Æ¤Ó½Ð¤·¤Æ¡¢CSV¥Õ¥¡¥¤¥ë¤òTSV¥Ç¡¼¥¿¤ËÊÑ´¹¤·¤Æɸ½à½ÐÎϤǽÐÎϤ¹¤ë¤È¤¤¤Ã¤¿ÆâÍƤˤ·¤Æ¤¢¤ë¡£

¼ç¤Ê´Ø¿ô¤Ï¼¡¤Î¤È¤ª¤ê¤À¡£

¤½¤ì¤¾¤ì¤Î¼ÂÁõ¤ò¸«¤Æ¤¤¤³¤¦¡£¤Þ¤º¡¢util_file.c¤Ï¼¡¤Î¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£

util_file.c

#include

#include

#include

char *file2str(const char *filepath) {

struct stat st;

int filesize, c;

char *buf, *p;

FILE *fp;

stat(filepath, &st);

filesize = st.st_size;

buf = calloc(filesize + 1, sizeof(char));

p = buf;

fp = fopen(filepath, "r");

for (int i = 0; i < filesize; i++) {

c = fgetc(fp);

if (EOF == c) {

break;

}

*p = (char)c;

++p;

}

return buf;

}

¤³¤Î¼ÂÁõ¤Ë´Ø¤¹¤ëÀâÌÀ¤ÏÉÔÍפÀ¤È»×¤¦¡£file2str()¤Ç¤Ï°ú¿ô¤Ë¥Ñ¥¹¤ò¼è¤Ã¤Æ¤ª¤ê¡¢¤³¤ì¤òfopen(2)¥·¥¹¥Æ¥à¥³¡¼¥ë¤Ç³«¤¤¤Æfgetc()¤Ç1ʸ»ú¤Å¤ÄÆɤ߹þ¤ó¤Ç¤¤¤ë¤À¤±¤À¡£»ØÄꤵ¤ì¤¿¥Õ¥¡¥¤¥ë¤ÎÃæ¿È¤òchar·¿¤Î¥Ç¡¼¥¿¤È¤·¤Æ¤¹¤Ù¤Æ¥á¥â¥ê¤Ø¥³¥Ô¡¼¤·¤Æ¤¤¤ë¡£

CSV¥Ç¡¼¥¿¤òTSV¥Ç¡¼¥¿¤ØÊÑ´¹¤¹¤ë¼ÂÁõ¤Ç¤¢¤ëutil_csv.c¤Ï¼¡¤Î¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£

util_csv.c

#include

static bool record_outputed;

static char gettsvchar(const char);

int csv2tsv(const char *ibuf, int ibufsize, char *obuf, int obufsize) {

// When the target is empty, no processing is done.

if (0 == ibufsize)

return 0;

const char *p_i, *end_i;

char *p_o;

int tsv_len = 0;

p_i = ibuf;

end_i = &ibuf[ibufsize - 1];

p_o = obuf;

// Indicates the state during parsing.

typedef enum FIELD_STATUS {

FIELD_END,

IN_FIELD,

IN_QUOTED_FIELD

} record_status;

record_status rs = FIELD_END;

record_outputed = false;

while (1) {

if ('\n' == *p_i) {

if (!record_outputed) {

// nothing

}

rs = FIELD_END;

*p_o = gettsvchar('\n');

++p_o;

++tsv_len;

} else {

switch (rs) {

case FIELD_END:

if (',' == *p_i) {

// nothing

} else if ('"' == *p_i) {

rs = IN_QUOTED_FIELD;

} else {

rs = IN_FIELD;

*p_o = gettsvchar(*p_i);

++p_o;

++tsv_len;

}

break;

case IN_FIELD:

if (',' == *p_i) {

rs = FIELD_END;

} else {

*p_o = gettsvchar(*p_i);

++p_o;

++tsv_len;

}

break;

case IN_QUOTED_FIELD:

if ('"' == *p_i) {

if (p_i == end_i) {

rs = FIELD_END;

} else if (',' == *(p_i + 1)) {

rs = FIELD_END;

++p_i;

} else if ('"' == *(p_i + 1)) {

*p_o = gettsvchar(*p_i);

++p_o;

++tsv_len;

++p_i;

}

} else {

*p_o = gettsvchar(*p_i);

++p_o;

++tsv_len;

}

break;

}

switch (rs) {

case FIELD_END:

*p_o = '\t';

++p_o;

++tsv_len;

record_outputed = false;

break;

case IN_FIELD:

case IN_QUOTED_FIELD:

break;

}

}

if (p_i == end_i || tsv_len == obufsize)

break;

else

++p_i;

}

return tsv_len;

}

static char gettsvchar(const char c) {

record_outputed = true;

if ('\t' == c) {

return ' ';

} else {

return c;

}

}

Á°²ó¤ËÀâÌÀ¤·¤¿CSV¤ÈTSV¤Î»ÅÍͤ˽¾¤Ã¤Æ¡¢CSV¥Ç¡¼¥¿¤òTSV¥Ç¡¼¥¿¤ØÊÑ´¹¤¹¤ë½èÍý¤ò¥·¥ó¥×¥ë¤Ë½ñ¤¤¤Æ¤¤¤¯¤È¤³¤ó¤Ê´¶¤¸¤Ë¤Ê¤ë¡£ÊÌÃÊ¡¢¹â®²½¤Ê¤É¤ò°Õ¼±¤·¤¿¼ÂÁõ¤Ï¤·¤Æ¤¤¤Ê¤¤¡£enum FIELD_STATUS¤È¤¤¤¦Îóµó·¿¤¬¡¢¸½ºß¥Ñ¡¼¥¹¤·¤Æ¤¤¤ë¥Ç¡¼¥¿¤¬¥Õ¥£¡¼¥ë¥ÉÆâ¤Ë¤¢¤ë¤«¡¢¥¯¥©¡¼¥È¤µ¤ì¤¿¥Õ¥£¡¼¥ë¥ÉÆâ¤Ë¤¢¤ë¤«¡¢¥Õ¥£¡¼¥ë¥É¤Î½ªÎ»¤«¡¢¤òÊÝ»ý¤·¤Æ¤¤¤ë¤Î¤Ç¡¢¤³¤Î¤¢¤¿¤ê¤ò°Õ¼±¤·¤Ê¤¬¤éÆɤó¤Ç¤¤¤±¤Ð½èÍý¤Î¥í¥¸¥Ã¥¯¤Ï¸«¤¨¤Æ¤¯¤ë¤È»×¤¦¡£

¤³¤ì¤é2¤Ä¤Î¥Õ¥¡¥¤¥ë¤Ç¼ÂÁõ¤µ¤ì¤Æ¤¤¤ë2¤Ä¤Î´Ø¿ô¤Ï¡¢¼¡¤Î¤è¤¦¤Ë¥Ø¥Ã¥À¥Õ¥¡¥¤¥ë¤ËÄêµÁ¤·¤Æ¤ª¤¯¡£

main.h

int csv2tsv(const char *, int, char *, int);

char *file2str(const char *)

main.c¤Ï¼¡¤Î¤è¤¦¤Ëfile2str()¤Ècsv2tsv()¤ò¸Æ¤Ó½Ð¤·¤Æ½èÍý¤ò¹Ô¤¦¤À¤±¤À¡£

main.c

#include

#include

#include

#include "main.h"

int main(int argc, char *argv[]) {

char *csvdata, *tsvdata;

int csvdata_bytes, tsvdata_bytes;

csvdata = file2str(argv[1]);

csvdata_bytes = strlen(csvdata);

tsvdata_bytes = csvdata_bytes;

tsvdata = calloc(tsvdata_bytes + 1, sizeof(char));

csv2tsv(csvdata, csvdata_bytes, tsvdata, tsvdata_bytes);

printf("%s", tsvdata);

return 0;

}

¤³¤Î¼ÂÁõ¤Ç¤Ï¡¢file2str()¤ÎÃæ¤Ç³ÎÊݤ·¤¿¥á¥â¥ê¤¬³«Êü¤µ¤ì¤Æ¤¤¤Ê¤¤¡£¼ÂÁõ¤Î¸«Ä̤·¤ÎÎɤµ¤ò¹Í¤¨¤ë¤È¡¢file2str()¤Ï¤Á¤ç¤Ã¤È¤Ð¤«¤ê²þÎɤÎ;ÃϤ¬¤¢¤ë¤Î¤À¤¬¡¢¤È¤ê¤¢¤¨¤ºÆ°ºî¤µ¤»¤ë¤³¤È¤òÍ¥À褷¤Æ¡¢¤³¤¦¤·¤¿¼ÂÁõ¤Ë¤·¤Æ¤¢¤ë¡£

¡ü

Windows API¤ò»È¤¦¤Î¤«¡¢POSIX¤Ë´ó¤»¤ë¤Î¤«

Àè¤Ë¾Ò²ð¤·¤¿¼ÂÁõ¤ÏPOSIX¤Ë´ó¤»¤¿¥³¡¼¥É¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£¾Ü¤·¤¤ÀâÌÀ¤Ï¤¤¤º¤ì¹Ô¤¦¤È¤·¤Æ¡¢¤³¤³¤Ç¤ÏPOSIX¤ÏUNIX·Ï¤Î¥ª¥Ú¥ì¡¼¥Æ¥£¥ó¥°¥·¥¹¥Æ¥à¤Ç¤è¤¯»È¤ï¤ì¤Æ¤¤¤ëAPI½¸¤Î¤è¤¦¤Ê¤â¤Î¤È¹Í¤¨¤Æ¤¤¤¿¤À¤­¤¿¤¤¡£POSIX¤Ë´ó¤»¤Æ½ñ¤¤¤Æ¤ª¤±¤Ð¡¢Windows¤ÇÆ°¤¤¤¿¥×¥í¥°¥é¥à¤¬Mac¤Ç¤âLinux¤Ç¤â¥³¥ó¥Ñ¥¤¥ë¤·¤ÆÍøÍѤǤ­¤ë¤è¤¦¤Ë¤·¤ä¤¹¤¤¡£Windows¤À¤±¤ÇÆ°¤±¤Ð¤è¤¤¤È¤¤¤¦¤³¤È¤Ç¤¢¤ì¤Ð¡¢¤³¤Î¤è¤¦¤Ê¥½¡¼¥¹¥³¡¼¥É¤Ç¤Ï¤Ê¤¯¡¢Windows API (Win32 API)¤ò»È¤Ã¤Æ¥½¡¼¥¹¥³¡¼¥É¤ò½ñ¤¯¤Û¤¦¤¬WindowsŪ¤À¡£

¤³¤ÎÊÕ¤ê¤Ï¡¢³«È¯¼Ô¤¬²¿¤òµá¤á¤Æ¤¤¤ë¤«¤ÇºÎÍѤ¹¤ë¤â¤Î¤¬ÊѤï¤Ã¤Æ¤¯¤ë¡£Windows¤À¤±¤ÇÆ°¤±¤Ð¤è¤¯¡¢¾­ÍèŪ¤ËMac¤äLinux¤Ë°Ü¿¢¤¹¤ë¤³¤È¤Ï¤Þ¤º¤Ê¤¤¤Î¤Ê¤é¡¢Windows API¤Ê¤É¤ÎMicrosoft¤¬Ä󶡤·¤Æ¤¤¤ëAPI¤ò»È¤Ã¤Æ¼ÂÁõ¤¹¤ë¤È¤è¤¤¤À¤í¤¦¡£Windows¤Î¤¿¤á¤Îµ¡Ç½¤À¡£

°ìÊý¡¢Windows¤Ç³«È¯¤¹¤ë¤¬Linux¤Ç¤â¼Â¹Ô¤Ç¤­¤ëɬÍפ¬¤¢¤ë¡¢¤È¤¤¤Ã¤¿¾ì¹ç¤Ï¡¢UNIX·Ï¥ª¥Ú¥ì¡¼¥Æ¥£¥ó¥°¥·¥¹¥Æ¥à¤Î¿¤¯¤¬½àµò¤Þ¤¿¤Ï½àµò¤Ë¶á¤¤¾õÂ֤ˤ¢¤ëPOSIX¤Ë½¬¤Ã¤¿¼ÂÁõ¤ò¤·¤Æ¤ª¤¯¤È¤è¤¤¡£Àè¤Û¤É¤Î¥½¡¼¥¹¥³¡¼¥É¤¬¤½¤ì¤Ë¶á¤¤¡£C¸À¸ì¤Î¶µ²Ê½ñŪ¤Ê½ñÀҤ䡢C¸À¸ì¤Î±é½¬¤Ê¤É¤ÇºÇ½é¤Ë½¬¤¦¥½¡¼¥¹¥³¡¼¥É¤ÏPOSIX¥³¡¼¥É¤ò»È¤Ã¤Æ¤¤¤ë¤³¤È¤¬Â¿¤¤¤À¤í¤¦¡£

ÁȤ߹þ¤ß¤Ë¤Ê¤ë¤ÈÏäϤޤ¿ÊѤï¤ê¡¢ÁȤ߹þ¤ßÍѤÎSDK¤¬Ä󶡤·¤Æ¤¤¤ë´Ø¿ô¤ò»È¤Ã¤¿¥³¡¼¥Ç¥£¥ó¥°¤ò¹Ô¤¦¤³¤È¤Ë¤Ê¤ë¡£·ë¶É¤Î¤È¤³¤í¡¢¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤äÀ½Éʤ´¤È¤Ë»È¤¦¤Ù¤­´Ø¿ô¤Ï°Û¤Ê¤ë¤Î¤Ç¡¢¤½¤Î¾ì¤½¤Î¾ì¤Ë¹ç¤ï¤»¤Æ´Ø¿ô¤òÍý²ò¤·¤Æ»È¤¦¤·¤«¤Ê¤¤¡£

Windows¤ÇPOSIX·Ï¤Î¼ÂÁõ¤ò¹Ô¤¦¾ì¹ç¡¢¸½¾õ¤ÏWSL (Windows Subsystem for Linux)¤È¸Æ¤Ð¤ì¤ëµ¡Ç½¤ò»È¤¦¤Î¤¬¸ø¼°¤È¤Ê¤ë¤À¤í¤¦¡£¤·¤«¤·¡¢Windows¤ÇPOSIX·Ï¤Î¼ÂÁõ¤ò¹Ô¤¦ÊýË¡¤Ï¤Û¤«¤Ë¤â¤¤¤¯¤Ä¤«¤¢¤ë¡£Windows API¤äPOSIX·Ï¤Î¼ÂÁõ¤Ë´Ø¤·¤Æ¤Ï¤¤¤º¤ì¤â¤Ã¤È¾Ü¤·¤¯ÀâÌÀ¤¹¤ëͽÄê¤À¡£¤Þ¤º¤Ïºî¤Ã¤¿¤â¤Î¤òÆ°¤«¤·¤Æ¤ß¤è¤¦¡£

½ñ¤­´¹¤¨¤¿Makefile

Ê£¿ô¤Î¥½¡¼¥¹¥³¡¼¥É¤ò¥Ó¥ë¥É¤¹¤ëÊýË¡¤È¤·¤Æ¡¢make¤ò¾Ò²ð¤·¤¿¡£ºÇ¤â¥·¥ó¥×¥ë¤Ê½ñ¤­Êý¤«¤é¼è¤ê¾å¤²¤Æ¡¢½ù¡¹¤Ë¼ÂºÝ¤Ë»È¤ï¤ì¤ë¤è¤¦¤Ê½ñ¤­Êý¤ËÊѤ¨¤Æ¤¤¤Ã¤¿¤¬¡¢¼¡¤Ë·ÇºÜ¤¹¤ëMakefile¤¬º£²ó¤Î¥Ð¡¼¥¸¥ç¥ó¤À¡£

Makefile

CMD= csv2tsv.exe

SRCS= $(wildcard *.c)

OBJS= $(SRCS:.c=.o)

CC= clang

CFLAGS+=-g

build: $(CMD)

$(CMD): $(OBJS)

$(CC) $(CFLAGS) -o $(CMD) $(OBJS)

.c.o:

$(CC) -c $< -o $@

clean:

rm -f $(CMD)

rm -f $(OBJS)

rm -f *.ilk

rm -f *.pdb

Á°²ó¤Þ¤Ç¤ÏSRSC¤ËC¥½¡¼¥¹¥³¡¼¥É¥Õ¥¡¥¤¥ë¤Î̾Á°¤òľÀܽñ¤¤¤Æ¤¤¤¿¤¬¡¢º£²ó¤Ï¤³¤³¤ò$(wildcard *.c)¤È¤¤¤¦µ­½Ò¤ËÊѤ¨¤Æ¤¢¤ë¡£GNU make¤Îµ¡Ç½¤ò»È¤Ã¤Æ¡¢.c¤È¤¤¤¦³ÈÄ¥»Ò¤Î¥Õ¥¡¥¤¥ë¤ò¼«Æ°Åª¤Ë»ØÄꤹ¤ë¤È¤¤¤Ã¤¿½èÍý¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£

¤³¤ó¤Ê´¶¤¸¤Î½ñ¤­Êý¤Ë¤·¤Æ¤ª¤¯¤È¡¢Â¾¤Î¥½¥Õ¥È¥¦¥§¥¢¤ò³«È¯¤¹¤ëºÝ¤ËήÍѤ·¤ä¤¹¤¤(ÅöÁ³¤¹¤Ù¤Æ¤¬Î®ÍѤǤ­¤ë¤ï¤±¤Ç¤Ï¤Ê¤¤¤·¡¢¸ÂÅ٤⤢¤ë¤¬)¡£º£²ó¤È»÷¤¿¤è¤¦¤Ê¹½À®¤Ç¤¢¤ì¤Ð¡¢CMD¤Ë»ØÄꤹ¤ë¥Ð¥¤¥Ê¥ê̾¤À¤±¤ò½ñ¤­´¹¤¨¤ì¤Ð»È¤¨¤ë¤È¤¤¤Ã¤¿´¶¤¸¤À¡£¼ÂºÝ¤Ë¤Ï¤â¤Ã¤È¿¤¯¤Î¥ª¥×¥·¥ç¥ó¤òÊÔ½¸¤¹¤ë¤³¤È¤Ë¤Ê¤ë¤Î¤À¤¬¡¢¤È¤Ã¤«¤«¤ê¤È¤·¤Æ¤Ï¤³¤ÎÄøÅ٤Ǥ褤¤À¤í¤¦¡£

¥Ó¥ë¥É¤·¤Æ»È¤Ã¤Æ¤ß¤ë

¤Ç¤Ï¡¢Áᮥӥë¥É¤·¤Æ»È¤Ã¤Æ¤ß¤è¤¦¡£Åö¤¿¤êÁ°¤À¤¬¡¢¼¡¤Î¤è¤¦¤ËCSV¥Õ¥¡¥¤¥ë¤òTSV¥Ç¡¼¥¿¤ØÊÑ´¹¤Ç¤­¤ë¤³¤È¤ò³Îǧ¤Ç¤­¤ë¤Ï¤º¤À¡£

csv2tsv.exe¤Î¼Â¹Ô¥µ¥ó¥×¥ë


(¤³¤ì¤Þ¤Ç¤Ë¹½ÃÛ¤·¤¿´Ä¶­¤Ç¥Ó¥ë¥É¤¹¤ë¤È¡¢¼ÂºÝ¤Ë¤Ï¥³¥ó¥Ñ¥¤¥ë»þ¤Ë¥ï¡¼¥Ë¥ó¥°¤¬½Ð¤ë¤Î¤À¤¬¡¢¸«¤Ë¤¯¤¯¤Ê¤ë¤Î¤Çº£²ó¤Ï¤½¤³¤ÏÇÓ½ü¤·¤¿¡£¤³¤ÎÅÀ¤â¤¤¤º¤ì¼è¤ê¾å¤²¤ë)

¤³¤ó¤Ê´¶¤¸¤ÇÈæ³ÓŪ´Êñ¤Ëñµ¡Ç½¤Î¥³¥Þ¥ó¥É¤Ï³«È¯¤Ç¤­¤ë¡£C¤Ç³«È¯¤¹¤ë¤È°Õ¼±¤·¤Ê¤¯¤Æ¤â¡¢¹â®¤ËÆ°ºî¤¹¤ë¥×¥í¥°¥é¥à¤¬½ÐÍè¾å¤¬¤ê¤ä¤¹¤¤¡£´·¤ì¤Ê¤¤¤È¥Ý¥¤¥ó¥¿¤ä»²¾È¡¢¥á¥â¥ê¤Î»È¤¤¤ËÆñµ·¤¹¤ë¤È¤Ï»×¤¦¤¬¡¢»È¤¨¤ë¤è¤¦¤Ë¤Ê¤Ã¤Æ¤ª¤¯¤È¤Ä¤Ö¤·¤¬¸ú¤­¤ä¤¹¤¤¥×¥í¥°¥é¥ß¥ó¥°¸À¸ì¤Ê¤Î¤À¡£

¡û»²¹Í

RFC4180 - Common Format and MIME Type for Comma-Separated Values (CSV) Files

Definition of tab-separated-values (tsv), Internet Assigned Numbers Authority