Windows 10¤ÇC¸À¸ì³«È¯¤ò¤·¤è¤¦!¡¡CSV¤òTSV¤ØÊÑ´¹¤¹¤ë¥×¥í¥°¥é¥à¤ò»È¤¦
¡ü
CSV¤òTSV¤ØÊÑ´¹¤¹¤ë¥×¥í¥°¥é¥à
Á°²ó¤ÏCSV¥Ç¡¼¥¿¤òTSV¥Ç¡¼¥¿¤ËÊÑ´¹¤¹¤ë¥×¥í¥°¥é¥à¤òºîÀ®¤·¤¿¡£¤³¤Î¥×¥í¥°¥é¥à¤Ï¼ç¤Ë¼¡¤Î3¤Ä¤Î¥Õ¥¡¥¤¥ë¤Ç¹½À®¤µ¤ì¤Æ¤¤¤ë¡£
util_file.c¤Ë¥Õ¥¡¥¤¥ë´ØÏ¢¤Î½èÍý¤ò½ñ¤¤¤Æ¤¤¤¡¢util_csv.c¤ËCSV´ØÏ¢¤Î½èÍý¤ò½ñ¤¤¤Æ¤¯¡¢¤È¤¤¤Ã¤¿´¶¤¸¤À¡£main.c¤«¤é¤³¤Î2¤Ä¤Î½èÍý¤ò¸Æ¤Ó½Ð¤·¤Æ¡¢CSV¥Õ¥¡¥¤¥ë¤òTSV¥Ç¡¼¥¿¤ËÊÑ´¹¤·¤Æɸ½à½ÐÎϤǽÐÎϤ¹¤ë¤È¤¤¤Ã¤¿ÆâÍƤˤ·¤Æ¤¢¤ë¡£
¼ç¤Ê´Ø¿ô¤Ï¼¡¤Î¤È¤ª¤ê¤À¡£
¤½¤ì¤¾¤ì¤Î¼ÂÁõ¤ò¸«¤Æ¤¤¤³¤¦¡£¤Þ¤º¡¢util_file.c¤Ï¼¡¤Î¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£
util_file.c
#include
#include
#include
char *file2str(const char *filepath) {
struct stat st;
int filesize, c;
char *buf, *p;
FILE *fp;
stat(filepath, &st);
filesize = st.st_size;
buf = calloc(filesize + 1, sizeof(char));
p = buf;
fp = fopen(filepath, "r");
for (int i = 0; i < filesize; i++) {
c = fgetc(fp);
if (EOF == c) {
break;
}
*p = (char)c;
++p;
}
return buf;
}
¤³¤Î¼ÂÁõ¤Ë´Ø¤¹¤ëÀâÌÀ¤ÏÉÔÍפÀ¤È»×¤¦¡£file2str()¤Ç¤Ï°ú¿ô¤Ë¥Ñ¥¹¤ò¼è¤Ã¤Æ¤ª¤ê¡¢¤³¤ì¤òfopen(2)¥·¥¹¥Æ¥à¥³¡¼¥ë¤Ç³«¤¤¤Æfgetc()¤Ç1ʸ»ú¤Å¤ÄÆɤ߹þ¤ó¤Ç¤¤¤ë¤À¤±¤À¡£»ØÄꤵ¤ì¤¿¥Õ¥¡¥¤¥ë¤ÎÃæ¿È¤òchar·¿¤Î¥Ç¡¼¥¿¤È¤·¤Æ¤¹¤Ù¤Æ¥á¥â¥ê¤Ø¥³¥Ô¡¼¤·¤Æ¤¤¤ë¡£
CSV¥Ç¡¼¥¿¤òTSV¥Ç¡¼¥¿¤ØÊÑ´¹¤¹¤ë¼ÂÁõ¤Ç¤¢¤ëutil_csv.c¤Ï¼¡¤Î¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£
util_csv.c
#include
static bool record_outputed;
static char gettsvchar(const char);
int csv2tsv(const char *ibuf, int ibufsize, char *obuf, int obufsize) {
// When the target is empty, no processing is done.
if (0 == ibufsize)
return 0;
const char *p_i, *end_i;
char *p_o;
int tsv_len = 0;
p_i = ibuf;
end_i = &ibuf[ibufsize - 1];
p_o = obuf;
// Indicates the state during parsing.
typedef enum FIELD_STATUS {
FIELD_END,
IN_FIELD,
IN_QUOTED_FIELD
} record_status;
record_status rs = FIELD_END;
record_outputed = false;
while (1) {
if ('\n' == *p_i) {
if (!record_outputed) {
// nothing
}
rs = FIELD_END;
*p_o = gettsvchar('\n');
++p_o;
++tsv_len;
} else {
switch (rs) {
case FIELD_END:
if (',' == *p_i) {
// nothing
} else if ('"' == *p_i) {
rs = IN_QUOTED_FIELD;
} else {
rs = IN_FIELD;
*p_o = gettsvchar(*p_i);
++p_o;
++tsv_len;
}
break;
case IN_FIELD:
if (',' == *p_i) {
rs = FIELD_END;
} else {
*p_o = gettsvchar(*p_i);
++p_o;
++tsv_len;
}
break;
case IN_QUOTED_FIELD:
if ('"' == *p_i) {
if (p_i == end_i) {
rs = FIELD_END;
} else if (',' == *(p_i + 1)) {
rs = FIELD_END;
++p_i;
} else if ('"' == *(p_i + 1)) {
*p_o = gettsvchar(*p_i);
++p_o;
++tsv_len;
++p_i;
}
} else {
*p_o = gettsvchar(*p_i);
++p_o;
++tsv_len;
}
break;
}
switch (rs) {
case FIELD_END:
*p_o = '\t';
++p_o;
++tsv_len;
record_outputed = false;
break;
case IN_FIELD:
case IN_QUOTED_FIELD:
break;
}
}
if (p_i == end_i || tsv_len == obufsize)
break;
else
++p_i;
}
return tsv_len;
}
static char gettsvchar(const char c) {
record_outputed = true;
if ('\t' == c) {
return ' ';
} else {
return c;
}
}
Á°²ó¤ËÀâÌÀ¤·¤¿CSV¤ÈTSV¤Î»ÅÍͤ˽¾¤Ã¤Æ¡¢CSV¥Ç¡¼¥¿¤òTSV¥Ç¡¼¥¿¤ØÊÑ´¹¤¹¤ë½èÍý¤ò¥·¥ó¥×¥ë¤Ë½ñ¤¤¤Æ¤¤¤¯¤È¤³¤ó¤Ê´¶¤¸¤Ë¤Ê¤ë¡£ÊÌÃÊ¡¢¹â®²½¤Ê¤É¤ò°Õ¼±¤·¤¿¼ÂÁõ¤Ï¤·¤Æ¤¤¤Ê¤¤¡£enum FIELD_STATUS¤È¤¤¤¦Îóµó·¿¤¬¡¢¸½ºß¥Ñ¡¼¥¹¤·¤Æ¤¤¤ë¥Ç¡¼¥¿¤¬¥Õ¥£¡¼¥ë¥ÉÆâ¤Ë¤¢¤ë¤«¡¢¥¯¥©¡¼¥È¤µ¤ì¤¿¥Õ¥£¡¼¥ë¥ÉÆâ¤Ë¤¢¤ë¤«¡¢¥Õ¥£¡¼¥ë¥É¤Î½ªÎ»¤«¡¢¤òÊÝ»ý¤·¤Æ¤¤¤ë¤Î¤Ç¡¢¤³¤Î¤¢¤¿¤ê¤ò°Õ¼±¤·¤Ê¤¬¤éÆɤó¤Ç¤¤¤±¤Ð½èÍý¤Î¥í¥¸¥Ã¥¯¤Ï¸«¤¨¤Æ¤¯¤ë¤È»×¤¦¡£
¤³¤ì¤é2¤Ä¤Î¥Õ¥¡¥¤¥ë¤Ç¼ÂÁõ¤µ¤ì¤Æ¤¤¤ë2¤Ä¤Î´Ø¿ô¤Ï¡¢¼¡¤Î¤è¤¦¤Ë¥Ø¥Ã¥À¥Õ¥¡¥¤¥ë¤ËÄêµÁ¤·¤Æ¤ª¤¯¡£
main.h
int csv2tsv(const char *, int, char *, int);
char *file2str(const char *)
main.c¤Ï¼¡¤Î¤è¤¦¤Ëfile2str()¤Ècsv2tsv()¤ò¸Æ¤Ó½Ð¤·¤Æ½èÍý¤ò¹Ô¤¦¤À¤±¤À¡£
main.c
#include
#include
#include
#include "main.h"
int main(int argc, char *argv[]) {
char *csvdata, *tsvdata;
int csvdata_bytes, tsvdata_bytes;
csvdata = file2str(argv[1]);
csvdata_bytes = strlen(csvdata);
tsvdata_bytes = csvdata_bytes;
tsvdata = calloc(tsvdata_bytes + 1, sizeof(char));
csv2tsv(csvdata, csvdata_bytes, tsvdata, tsvdata_bytes);
printf("%s", tsvdata);
return 0;
}
¤³¤Î¼ÂÁõ¤Ç¤Ï¡¢file2str()¤ÎÃæ¤Ç³ÎÊݤ·¤¿¥á¥â¥ê¤¬³«Êü¤µ¤ì¤Æ¤¤¤Ê¤¤¡£¼ÂÁõ¤Î¸«Ä̤·¤ÎÎɤµ¤ò¹Í¤¨¤ë¤È¡¢file2str()¤Ï¤Á¤ç¤Ã¤È¤Ð¤«¤ê²þÎɤÎ;ÃϤ¬¤¢¤ë¤Î¤À¤¬¡¢¤È¤ê¤¢¤¨¤ºÆ°ºî¤µ¤»¤ë¤³¤È¤òÍ¥À褷¤Æ¡¢¤³¤¦¤·¤¿¼ÂÁõ¤Ë¤·¤Æ¤¢¤ë¡£
¡ü
Windows API¤ò»È¤¦¤Î¤«¡¢POSIX¤Ë´ó¤»¤ë¤Î¤«
Àè¤Ë¾Ò²ð¤·¤¿¼ÂÁõ¤ÏPOSIX¤Ë´ó¤»¤¿¥³¡¼¥É¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£¾Ü¤·¤¤ÀâÌÀ¤Ï¤¤¤º¤ì¹Ô¤¦¤È¤·¤Æ¡¢¤³¤³¤Ç¤ÏPOSIX¤ÏUNIX·Ï¤Î¥ª¥Ú¥ì¡¼¥Æ¥£¥ó¥°¥·¥¹¥Æ¥à¤Ç¤è¤¯»È¤ï¤ì¤Æ¤¤¤ëAPI½¸¤Î¤è¤¦¤Ê¤â¤Î¤È¹Í¤¨¤Æ¤¤¤¿¤À¤¤¿¤¤¡£POSIX¤Ë´ó¤»¤Æ½ñ¤¤¤Æ¤ª¤±¤Ð¡¢Windows¤ÇÆ°¤¤¤¿¥×¥í¥°¥é¥à¤¬Mac¤Ç¤âLinux¤Ç¤â¥³¥ó¥Ñ¥¤¥ë¤·¤ÆÍøÍѤǤ¤ë¤è¤¦¤Ë¤·¤ä¤¹¤¤¡£Windows¤À¤±¤ÇÆ°¤±¤Ð¤è¤¤¤È¤¤¤¦¤³¤È¤Ç¤¢¤ì¤Ð¡¢¤³¤Î¤è¤¦¤Ê¥½¡¼¥¹¥³¡¼¥É¤Ç¤Ï¤Ê¤¯¡¢Windows API (Win32 API)¤ò»È¤Ã¤Æ¥½¡¼¥¹¥³¡¼¥É¤ò½ñ¤¯¤Û¤¦¤¬WindowsŪ¤À¡£
¤³¤ÎÊÕ¤ê¤Ï¡¢³«È¯¼Ô¤¬²¿¤òµá¤á¤Æ¤¤¤ë¤«¤ÇºÎÍѤ¹¤ë¤â¤Î¤¬ÊѤï¤Ã¤Æ¤¯¤ë¡£Windows¤À¤±¤ÇÆ°¤±¤Ð¤è¤¯¡¢¾ÍèŪ¤ËMac¤äLinux¤Ë°Ü¿¢¤¹¤ë¤³¤È¤Ï¤Þ¤º¤Ê¤¤¤Î¤Ê¤é¡¢Windows API¤Ê¤É¤ÎMicrosoft¤¬Ä󶡤·¤Æ¤¤¤ëAPI¤ò»È¤Ã¤Æ¼ÂÁõ¤¹¤ë¤È¤è¤¤¤À¤í¤¦¡£Windows¤Î¤¿¤á¤Îµ¡Ç½¤À¡£
°ìÊý¡¢Windows¤Ç³«È¯¤¹¤ë¤¬Linux¤Ç¤â¼Â¹Ô¤Ç¤¤ëɬÍפ¬¤¢¤ë¡¢¤È¤¤¤Ã¤¿¾ì¹ç¤Ï¡¢UNIX·Ï¥ª¥Ú¥ì¡¼¥Æ¥£¥ó¥°¥·¥¹¥Æ¥à¤Î¿¤¯¤¬½àµò¤Þ¤¿¤Ï½àµò¤Ë¶á¤¤¾õÂ֤ˤ¢¤ëPOSIX¤Ë½¬¤Ã¤¿¼ÂÁõ¤ò¤·¤Æ¤ª¤¯¤È¤è¤¤¡£Àè¤Û¤É¤Î¥½¡¼¥¹¥³¡¼¥É¤¬¤½¤ì¤Ë¶á¤¤¡£C¸À¸ì¤Î¶µ²Ê½ñŪ¤Ê½ñÀҤ䡢C¸À¸ì¤Î±é½¬¤Ê¤É¤ÇºÇ½é¤Ë½¬¤¦¥½¡¼¥¹¥³¡¼¥É¤ÏPOSIX¥³¡¼¥É¤ò»È¤Ã¤Æ¤¤¤ë¤³¤È¤¬Â¿¤¤¤À¤í¤¦¡£
ÁȤ߹þ¤ß¤Ë¤Ê¤ë¤ÈÏäϤޤ¿ÊѤï¤ê¡¢ÁȤ߹þ¤ßÍѤÎSDK¤¬Ä󶡤·¤Æ¤¤¤ë´Ø¿ô¤ò»È¤Ã¤¿¥³¡¼¥Ç¥£¥ó¥°¤ò¹Ô¤¦¤³¤È¤Ë¤Ê¤ë¡£·ë¶É¤Î¤È¤³¤í¡¢¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤äÀ½Éʤ´¤È¤Ë»È¤¦¤Ù¤´Ø¿ô¤Ï°Û¤Ê¤ë¤Î¤Ç¡¢¤½¤Î¾ì¤½¤Î¾ì¤Ë¹ç¤ï¤»¤Æ´Ø¿ô¤òÍý²ò¤·¤Æ»È¤¦¤·¤«¤Ê¤¤¡£
Windows¤ÇPOSIX·Ï¤Î¼ÂÁõ¤ò¹Ô¤¦¾ì¹ç¡¢¸½¾õ¤ÏWSL (Windows Subsystem for Linux)¤È¸Æ¤Ð¤ì¤ëµ¡Ç½¤ò»È¤¦¤Î¤¬¸ø¼°¤È¤Ê¤ë¤À¤í¤¦¡£¤·¤«¤·¡¢Windows¤ÇPOSIX·Ï¤Î¼ÂÁõ¤ò¹Ô¤¦ÊýË¡¤Ï¤Û¤«¤Ë¤â¤¤¤¯¤Ä¤«¤¢¤ë¡£Windows API¤äPOSIX·Ï¤Î¼ÂÁõ¤Ë´Ø¤·¤Æ¤Ï¤¤¤º¤ì¤â¤Ã¤È¾Ü¤·¤¯ÀâÌÀ¤¹¤ëͽÄê¤À¡£¤Þ¤º¤Ïºî¤Ã¤¿¤â¤Î¤òÆ°¤«¤·¤Æ¤ß¤è¤¦¡£
½ñ¤´¹¤¨¤¿Makefile
Ê£¿ô¤Î¥½¡¼¥¹¥³¡¼¥É¤ò¥Ó¥ë¥É¤¹¤ëÊýË¡¤È¤·¤Æ¡¢make¤ò¾Ò²ð¤·¤¿¡£ºÇ¤â¥·¥ó¥×¥ë¤Ê½ñ¤Êý¤«¤é¼è¤ê¾å¤²¤Æ¡¢½ù¡¹¤Ë¼ÂºÝ¤Ë»È¤ï¤ì¤ë¤è¤¦¤Ê½ñ¤Êý¤ËÊѤ¨¤Æ¤¤¤Ã¤¿¤¬¡¢¼¡¤Ë·ÇºÜ¤¹¤ëMakefile¤¬º£²ó¤Î¥Ð¡¼¥¸¥ç¥ó¤À¡£
Makefile
CMD= csv2tsv.exe
SRCS= $(wildcard *.c)
OBJS= $(SRCS:.c=.o)
CC= clang
CFLAGS+=-g
build: $(CMD)
$(CMD): $(OBJS)
$(CC) $(CFLAGS) -o $(CMD) $(OBJS)
.c.o:
$(CC) -c $< -o $@
clean:
rm -f $(CMD)
rm -f $(OBJS)
rm -f *.ilk
rm -f *.pdb
Á°²ó¤Þ¤Ç¤ÏSRSC¤ËC¥½¡¼¥¹¥³¡¼¥É¥Õ¥¡¥¤¥ë¤Î̾Á°¤òľÀܽñ¤¤¤Æ¤¤¤¿¤¬¡¢º£²ó¤Ï¤³¤³¤ò$(wildcard *.c)¤È¤¤¤¦µ½Ò¤ËÊѤ¨¤Æ¤¢¤ë¡£GNU make¤Îµ¡Ç½¤ò»È¤Ã¤Æ¡¢.c¤È¤¤¤¦³ÈÄ¥»Ò¤Î¥Õ¥¡¥¤¥ë¤ò¼«Æ°Åª¤Ë»ØÄꤹ¤ë¤È¤¤¤Ã¤¿½èÍý¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£
¤³¤ó¤Ê´¶¤¸¤Î½ñ¤Êý¤Ë¤·¤Æ¤ª¤¯¤È¡¢Â¾¤Î¥½¥Õ¥È¥¦¥§¥¢¤ò³«È¯¤¹¤ëºÝ¤ËήÍѤ·¤ä¤¹¤¤(ÅöÁ³¤¹¤Ù¤Æ¤¬Î®ÍѤǤ¤ë¤ï¤±¤Ç¤Ï¤Ê¤¤¤·¡¢¸ÂÅ٤⤢¤ë¤¬)¡£º£²ó¤È»÷¤¿¤è¤¦¤Ê¹½À®¤Ç¤¢¤ì¤Ð¡¢CMD¤Ë»ØÄꤹ¤ë¥Ð¥¤¥Ê¥ê̾¤À¤±¤ò½ñ¤´¹¤¨¤ì¤Ð»È¤¨¤ë¤È¤¤¤Ã¤¿´¶¤¸¤À¡£¼ÂºÝ¤Ë¤Ï¤â¤Ã¤È¿¤¯¤Î¥ª¥×¥·¥ç¥ó¤òÊÔ½¸¤¹¤ë¤³¤È¤Ë¤Ê¤ë¤Î¤À¤¬¡¢¤È¤Ã¤«¤«¤ê¤È¤·¤Æ¤Ï¤³¤ÎÄøÅ٤Ǥ褤¤À¤í¤¦¡£
¥Ó¥ë¥É¤·¤Æ»È¤Ã¤Æ¤ß¤ë
¤Ç¤Ï¡¢Áᮥӥë¥É¤·¤Æ»È¤Ã¤Æ¤ß¤è¤¦¡£Åö¤¿¤êÁ°¤À¤¬¡¢¼¡¤Î¤è¤¦¤ËCSV¥Õ¥¡¥¤¥ë¤òTSV¥Ç¡¼¥¿¤ØÊÑ´¹¤Ç¤¤ë¤³¤È¤ò³Îǧ¤Ç¤¤ë¤Ï¤º¤À¡£
(¤³¤ì¤Þ¤Ç¤Ë¹½ÃÛ¤·¤¿´Ä¶¤Ç¥Ó¥ë¥É¤¹¤ë¤È¡¢¼ÂºÝ¤Ë¤Ï¥³¥ó¥Ñ¥¤¥ë»þ¤Ë¥ï¡¼¥Ë¥ó¥°¤¬½Ð¤ë¤Î¤À¤¬¡¢¸«¤Ë¤¯¤¯¤Ê¤ë¤Î¤Çº£²ó¤Ï¤½¤³¤ÏÇÓ½ü¤·¤¿¡£¤³¤ÎÅÀ¤â¤¤¤º¤ì¼è¤ê¾å¤²¤ë)
¤³¤ó¤Ê´¶¤¸¤ÇÈæ³ÓŪ´Êñ¤Ëñµ¡Ç½¤Î¥³¥Þ¥ó¥É¤Ï³«È¯¤Ç¤¤ë¡£C¤Ç³«È¯¤¹¤ë¤È°Õ¼±¤·¤Ê¤¯¤Æ¤â¡¢¹â®¤ËÆ°ºî¤¹¤ë¥×¥í¥°¥é¥à¤¬½ÐÍè¾å¤¬¤ê¤ä¤¹¤¤¡£´·¤ì¤Ê¤¤¤È¥Ý¥¤¥ó¥¿¤ä»²¾È¡¢¥á¥â¥ê¤Î»È¤¤¤ËÆñµ·¤¹¤ë¤È¤Ï»×¤¦¤¬¡¢»È¤¨¤ë¤è¤¦¤Ë¤Ê¤Ã¤Æ¤ª¤¯¤È¤Ä¤Ö¤·¤¬¸ú¤¤ä¤¹¤¤¥×¥í¥°¥é¥ß¥ó¥°¸À¸ì¤Ê¤Î¤À¡£
¡û»²¹Í
RFC4180 - Common Format and MIME Type for Comma-Separated Values (CSV) Files
Definition of tab-separated-values (tsv), Internet Assigned Numbers Authority
CSV¤òTSV¤ØÊÑ´¹¤¹¤ë¥×¥í¥°¥é¥à
Á°²ó¤ÏCSV¥Ç¡¼¥¿¤òTSV¥Ç¡¼¥¿¤ËÊÑ´¹¤¹¤ë¥×¥í¥°¥é¥à¤òºîÀ®¤·¤¿¡£¤³¤Î¥×¥í¥°¥é¥à¤Ï¼ç¤Ë¼¡¤Î3¤Ä¤Î¥Õ¥¡¥¤¥ë¤Ç¹½À®¤µ¤ì¤Æ¤¤¤ë¡£
util_file.c¤Ë¥Õ¥¡¥¤¥ë´ØÏ¢¤Î½èÍý¤ò½ñ¤¤¤Æ¤¤¤¡¢util_csv.c¤ËCSV´ØÏ¢¤Î½èÍý¤ò½ñ¤¤¤Æ¤¯¡¢¤È¤¤¤Ã¤¿´¶¤¸¤À¡£main.c¤«¤é¤³¤Î2¤Ä¤Î½èÍý¤ò¸Æ¤Ó½Ð¤·¤Æ¡¢CSV¥Õ¥¡¥¤¥ë¤òTSV¥Ç¡¼¥¿¤ËÊÑ´¹¤·¤Æɸ½à½ÐÎϤǽÐÎϤ¹¤ë¤È¤¤¤Ã¤¿ÆâÍƤˤ·¤Æ¤¢¤ë¡£
¤½¤ì¤¾¤ì¤Î¼ÂÁõ¤ò¸«¤Æ¤¤¤³¤¦¡£¤Þ¤º¡¢util_file.c¤Ï¼¡¤Î¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£
util_file.c
#include
#include
#include
char *file2str(const char *filepath) {
struct stat st;
int filesize, c;
char *buf, *p;
FILE *fp;
stat(filepath, &st);
filesize = st.st_size;
buf = calloc(filesize + 1, sizeof(char));
p = buf;
fp = fopen(filepath, "r");
for (int i = 0; i < filesize; i++) {
c = fgetc(fp);
if (EOF == c) {
break;
}
*p = (char)c;
++p;
}
return buf;
}
¤³¤Î¼ÂÁõ¤Ë´Ø¤¹¤ëÀâÌÀ¤ÏÉÔÍפÀ¤È»×¤¦¡£file2str()¤Ç¤Ï°ú¿ô¤Ë¥Ñ¥¹¤ò¼è¤Ã¤Æ¤ª¤ê¡¢¤³¤ì¤òfopen(2)¥·¥¹¥Æ¥à¥³¡¼¥ë¤Ç³«¤¤¤Æfgetc()¤Ç1ʸ»ú¤Å¤ÄÆɤ߹þ¤ó¤Ç¤¤¤ë¤À¤±¤À¡£»ØÄꤵ¤ì¤¿¥Õ¥¡¥¤¥ë¤ÎÃæ¿È¤òchar·¿¤Î¥Ç¡¼¥¿¤È¤·¤Æ¤¹¤Ù¤Æ¥á¥â¥ê¤Ø¥³¥Ô¡¼¤·¤Æ¤¤¤ë¡£
CSV¥Ç¡¼¥¿¤òTSV¥Ç¡¼¥¿¤ØÊÑ´¹¤¹¤ë¼ÂÁõ¤Ç¤¢¤ëutil_csv.c¤Ï¼¡¤Î¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£
util_csv.c
#include
static bool record_outputed;
static char gettsvchar(const char);
int csv2tsv(const char *ibuf, int ibufsize, char *obuf, int obufsize) {
// When the target is empty, no processing is done.
if (0 == ibufsize)
return 0;
const char *p_i, *end_i;
char *p_o;
int tsv_len = 0;
p_i = ibuf;
end_i = &ibuf[ibufsize - 1];
p_o = obuf;
// Indicates the state during parsing.
typedef enum FIELD_STATUS {
FIELD_END,
IN_FIELD,
IN_QUOTED_FIELD
} record_status;
record_status rs = FIELD_END;
record_outputed = false;
while (1) {
if ('\n' == *p_i) {
if (!record_outputed) {
// nothing
}
rs = FIELD_END;
*p_o = gettsvchar('\n');
++p_o;
++tsv_len;
} else {
switch (rs) {
case FIELD_END:
if (',' == *p_i) {
// nothing
} else if ('"' == *p_i) {
rs = IN_QUOTED_FIELD;
} else {
rs = IN_FIELD;
*p_o = gettsvchar(*p_i);
++p_o;
++tsv_len;
}
break;
case IN_FIELD:
if (',' == *p_i) {
rs = FIELD_END;
} else {
*p_o = gettsvchar(*p_i);
++p_o;
++tsv_len;
}
break;
case IN_QUOTED_FIELD:
if ('"' == *p_i) {
if (p_i == end_i) {
rs = FIELD_END;
} else if (',' == *(p_i + 1)) {
rs = FIELD_END;
++p_i;
} else if ('"' == *(p_i + 1)) {
*p_o = gettsvchar(*p_i);
++p_o;
++tsv_len;
++p_i;
}
} else {
*p_o = gettsvchar(*p_i);
++p_o;
++tsv_len;
}
break;
}
switch (rs) {
case FIELD_END:
*p_o = '\t';
++p_o;
++tsv_len;
record_outputed = false;
break;
case IN_FIELD:
case IN_QUOTED_FIELD:
break;
}
}
if (p_i == end_i || tsv_len == obufsize)
break;
else
++p_i;
}
return tsv_len;
}
static char gettsvchar(const char c) {
record_outputed = true;
if ('\t' == c) {
return ' ';
} else {
return c;
}
}
Á°²ó¤ËÀâÌÀ¤·¤¿CSV¤ÈTSV¤Î»ÅÍͤ˽¾¤Ã¤Æ¡¢CSV¥Ç¡¼¥¿¤òTSV¥Ç¡¼¥¿¤ØÊÑ´¹¤¹¤ë½èÍý¤ò¥·¥ó¥×¥ë¤Ë½ñ¤¤¤Æ¤¤¤¯¤È¤³¤ó¤Ê´¶¤¸¤Ë¤Ê¤ë¡£ÊÌÃÊ¡¢¹â®²½¤Ê¤É¤ò°Õ¼±¤·¤¿¼ÂÁõ¤Ï¤·¤Æ¤¤¤Ê¤¤¡£enum FIELD_STATUS¤È¤¤¤¦Îóµó·¿¤¬¡¢¸½ºß¥Ñ¡¼¥¹¤·¤Æ¤¤¤ë¥Ç¡¼¥¿¤¬¥Õ¥£¡¼¥ë¥ÉÆâ¤Ë¤¢¤ë¤«¡¢¥¯¥©¡¼¥È¤µ¤ì¤¿¥Õ¥£¡¼¥ë¥ÉÆâ¤Ë¤¢¤ë¤«¡¢¥Õ¥£¡¼¥ë¥É¤Î½ªÎ»¤«¡¢¤òÊÝ»ý¤·¤Æ¤¤¤ë¤Î¤Ç¡¢¤³¤Î¤¢¤¿¤ê¤ò°Õ¼±¤·¤Ê¤¬¤éÆɤó¤Ç¤¤¤±¤Ð½èÍý¤Î¥í¥¸¥Ã¥¯¤Ï¸«¤¨¤Æ¤¯¤ë¤È»×¤¦¡£
¤³¤ì¤é2¤Ä¤Î¥Õ¥¡¥¤¥ë¤Ç¼ÂÁõ¤µ¤ì¤Æ¤¤¤ë2¤Ä¤Î´Ø¿ô¤Ï¡¢¼¡¤Î¤è¤¦¤Ë¥Ø¥Ã¥À¥Õ¥¡¥¤¥ë¤ËÄêµÁ¤·¤Æ¤ª¤¯¡£
main.h
int csv2tsv(const char *, int, char *, int);
char *file2str(const char *)
main.c¤Ï¼¡¤Î¤è¤¦¤Ëfile2str()¤Ècsv2tsv()¤ò¸Æ¤Ó½Ð¤·¤Æ½èÍý¤ò¹Ô¤¦¤À¤±¤À¡£
main.c
#include
#include
#include
#include "main.h"
int main(int argc, char *argv[]) {
char *csvdata, *tsvdata;
int csvdata_bytes, tsvdata_bytes;
csvdata = file2str(argv[1]);
csvdata_bytes = strlen(csvdata);
tsvdata_bytes = csvdata_bytes;
tsvdata = calloc(tsvdata_bytes + 1, sizeof(char));
csv2tsv(csvdata, csvdata_bytes, tsvdata, tsvdata_bytes);
printf("%s", tsvdata);
return 0;
}
¤³¤Î¼ÂÁõ¤Ç¤Ï¡¢file2str()¤ÎÃæ¤Ç³ÎÊݤ·¤¿¥á¥â¥ê¤¬³«Êü¤µ¤ì¤Æ¤¤¤Ê¤¤¡£¼ÂÁõ¤Î¸«Ä̤·¤ÎÎɤµ¤ò¹Í¤¨¤ë¤È¡¢file2str()¤Ï¤Á¤ç¤Ã¤È¤Ð¤«¤ê²þÎɤÎ;ÃϤ¬¤¢¤ë¤Î¤À¤¬¡¢¤È¤ê¤¢¤¨¤ºÆ°ºî¤µ¤»¤ë¤³¤È¤òÍ¥À褷¤Æ¡¢¤³¤¦¤·¤¿¼ÂÁõ¤Ë¤·¤Æ¤¢¤ë¡£
¡ü
Windows API¤ò»È¤¦¤Î¤«¡¢POSIX¤Ë´ó¤»¤ë¤Î¤«
Àè¤Ë¾Ò²ð¤·¤¿¼ÂÁõ¤ÏPOSIX¤Ë´ó¤»¤¿¥³¡¼¥É¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£¾Ü¤·¤¤ÀâÌÀ¤Ï¤¤¤º¤ì¹Ô¤¦¤È¤·¤Æ¡¢¤³¤³¤Ç¤ÏPOSIX¤ÏUNIX·Ï¤Î¥ª¥Ú¥ì¡¼¥Æ¥£¥ó¥°¥·¥¹¥Æ¥à¤Ç¤è¤¯»È¤ï¤ì¤Æ¤¤¤ëAPI½¸¤Î¤è¤¦¤Ê¤â¤Î¤È¹Í¤¨¤Æ¤¤¤¿¤À¤¤¿¤¤¡£POSIX¤Ë´ó¤»¤Æ½ñ¤¤¤Æ¤ª¤±¤Ð¡¢Windows¤ÇÆ°¤¤¤¿¥×¥í¥°¥é¥à¤¬Mac¤Ç¤âLinux¤Ç¤â¥³¥ó¥Ñ¥¤¥ë¤·¤ÆÍøÍѤǤ¤ë¤è¤¦¤Ë¤·¤ä¤¹¤¤¡£Windows¤À¤±¤ÇÆ°¤±¤Ð¤è¤¤¤È¤¤¤¦¤³¤È¤Ç¤¢¤ì¤Ð¡¢¤³¤Î¤è¤¦¤Ê¥½¡¼¥¹¥³¡¼¥É¤Ç¤Ï¤Ê¤¯¡¢Windows API (Win32 API)¤ò»È¤Ã¤Æ¥½¡¼¥¹¥³¡¼¥É¤ò½ñ¤¯¤Û¤¦¤¬WindowsŪ¤À¡£
¤³¤ÎÊÕ¤ê¤Ï¡¢³«È¯¼Ô¤¬²¿¤òµá¤á¤Æ¤¤¤ë¤«¤ÇºÎÍѤ¹¤ë¤â¤Î¤¬ÊѤï¤Ã¤Æ¤¯¤ë¡£Windows¤À¤±¤ÇÆ°¤±¤Ð¤è¤¯¡¢¾ÍèŪ¤ËMac¤äLinux¤Ë°Ü¿¢¤¹¤ë¤³¤È¤Ï¤Þ¤º¤Ê¤¤¤Î¤Ê¤é¡¢Windows API¤Ê¤É¤ÎMicrosoft¤¬Ä󶡤·¤Æ¤¤¤ëAPI¤ò»È¤Ã¤Æ¼ÂÁõ¤¹¤ë¤È¤è¤¤¤À¤í¤¦¡£Windows¤Î¤¿¤á¤Îµ¡Ç½¤À¡£
°ìÊý¡¢Windows¤Ç³«È¯¤¹¤ë¤¬Linux¤Ç¤â¼Â¹Ô¤Ç¤¤ëɬÍפ¬¤¢¤ë¡¢¤È¤¤¤Ã¤¿¾ì¹ç¤Ï¡¢UNIX·Ï¥ª¥Ú¥ì¡¼¥Æ¥£¥ó¥°¥·¥¹¥Æ¥à¤Î¿¤¯¤¬½àµò¤Þ¤¿¤Ï½àµò¤Ë¶á¤¤¾õÂ֤ˤ¢¤ëPOSIX¤Ë½¬¤Ã¤¿¼ÂÁõ¤ò¤·¤Æ¤ª¤¯¤È¤è¤¤¡£Àè¤Û¤É¤Î¥½¡¼¥¹¥³¡¼¥É¤¬¤½¤ì¤Ë¶á¤¤¡£C¸À¸ì¤Î¶µ²Ê½ñŪ¤Ê½ñÀҤ䡢C¸À¸ì¤Î±é½¬¤Ê¤É¤ÇºÇ½é¤Ë½¬¤¦¥½¡¼¥¹¥³¡¼¥É¤ÏPOSIX¥³¡¼¥É¤ò»È¤Ã¤Æ¤¤¤ë¤³¤È¤¬Â¿¤¤¤À¤í¤¦¡£
ÁȤ߹þ¤ß¤Ë¤Ê¤ë¤ÈÏäϤޤ¿ÊѤï¤ê¡¢ÁȤ߹þ¤ßÍѤÎSDK¤¬Ä󶡤·¤Æ¤¤¤ë´Ø¿ô¤ò»È¤Ã¤¿¥³¡¼¥Ç¥£¥ó¥°¤ò¹Ô¤¦¤³¤È¤Ë¤Ê¤ë¡£·ë¶É¤Î¤È¤³¤í¡¢¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤äÀ½Éʤ´¤È¤Ë»È¤¦¤Ù¤´Ø¿ô¤Ï°Û¤Ê¤ë¤Î¤Ç¡¢¤½¤Î¾ì¤½¤Î¾ì¤Ë¹ç¤ï¤»¤Æ´Ø¿ô¤òÍý²ò¤·¤Æ»È¤¦¤·¤«¤Ê¤¤¡£
Windows¤ÇPOSIX·Ï¤Î¼ÂÁõ¤ò¹Ô¤¦¾ì¹ç¡¢¸½¾õ¤ÏWSL (Windows Subsystem for Linux)¤È¸Æ¤Ð¤ì¤ëµ¡Ç½¤ò»È¤¦¤Î¤¬¸ø¼°¤È¤Ê¤ë¤À¤í¤¦¡£¤·¤«¤·¡¢Windows¤ÇPOSIX·Ï¤Î¼ÂÁõ¤ò¹Ô¤¦ÊýË¡¤Ï¤Û¤«¤Ë¤â¤¤¤¯¤Ä¤«¤¢¤ë¡£Windows API¤äPOSIX·Ï¤Î¼ÂÁõ¤Ë´Ø¤·¤Æ¤Ï¤¤¤º¤ì¤â¤Ã¤È¾Ü¤·¤¯ÀâÌÀ¤¹¤ëͽÄê¤À¡£¤Þ¤º¤Ïºî¤Ã¤¿¤â¤Î¤òÆ°¤«¤·¤Æ¤ß¤è¤¦¡£
½ñ¤´¹¤¨¤¿Makefile
Ê£¿ô¤Î¥½¡¼¥¹¥³¡¼¥É¤ò¥Ó¥ë¥É¤¹¤ëÊýË¡¤È¤·¤Æ¡¢make¤ò¾Ò²ð¤·¤¿¡£ºÇ¤â¥·¥ó¥×¥ë¤Ê½ñ¤Êý¤«¤é¼è¤ê¾å¤²¤Æ¡¢½ù¡¹¤Ë¼ÂºÝ¤Ë»È¤ï¤ì¤ë¤è¤¦¤Ê½ñ¤Êý¤ËÊѤ¨¤Æ¤¤¤Ã¤¿¤¬¡¢¼¡¤Ë·ÇºÜ¤¹¤ëMakefile¤¬º£²ó¤Î¥Ð¡¼¥¸¥ç¥ó¤À¡£
Makefile
CMD= csv2tsv.exe
SRCS= $(wildcard *.c)
OBJS= $(SRCS:.c=.o)
CC= clang
CFLAGS+=-g
build: $(CMD)
$(CMD): $(OBJS)
$(CC) $(CFLAGS) -o $(CMD) $(OBJS)
.c.o:
$(CC) -c $< -o $@
clean:
rm -f $(CMD)
rm -f $(OBJS)
rm -f *.ilk
rm -f *.pdb
Á°²ó¤Þ¤Ç¤ÏSRSC¤ËC¥½¡¼¥¹¥³¡¼¥É¥Õ¥¡¥¤¥ë¤Î̾Á°¤òľÀܽñ¤¤¤Æ¤¤¤¿¤¬¡¢º£²ó¤Ï¤³¤³¤ò$(wildcard *.c)¤È¤¤¤¦µ½Ò¤ËÊѤ¨¤Æ¤¢¤ë¡£GNU make¤Îµ¡Ç½¤ò»È¤Ã¤Æ¡¢.c¤È¤¤¤¦³ÈÄ¥»Ò¤Î¥Õ¥¡¥¤¥ë¤ò¼«Æ°Åª¤Ë»ØÄꤹ¤ë¤È¤¤¤Ã¤¿½èÍý¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£
¤³¤ó¤Ê´¶¤¸¤Î½ñ¤Êý¤Ë¤·¤Æ¤ª¤¯¤È¡¢Â¾¤Î¥½¥Õ¥È¥¦¥§¥¢¤ò³«È¯¤¹¤ëºÝ¤ËήÍѤ·¤ä¤¹¤¤(ÅöÁ³¤¹¤Ù¤Æ¤¬Î®ÍѤǤ¤ë¤ï¤±¤Ç¤Ï¤Ê¤¤¤·¡¢¸ÂÅ٤⤢¤ë¤¬)¡£º£²ó¤È»÷¤¿¤è¤¦¤Ê¹½À®¤Ç¤¢¤ì¤Ð¡¢CMD¤Ë»ØÄꤹ¤ë¥Ð¥¤¥Ê¥ê̾¤À¤±¤ò½ñ¤´¹¤¨¤ì¤Ð»È¤¨¤ë¤È¤¤¤Ã¤¿´¶¤¸¤À¡£¼ÂºÝ¤Ë¤Ï¤â¤Ã¤È¿¤¯¤Î¥ª¥×¥·¥ç¥ó¤òÊÔ½¸¤¹¤ë¤³¤È¤Ë¤Ê¤ë¤Î¤À¤¬¡¢¤È¤Ã¤«¤«¤ê¤È¤·¤Æ¤Ï¤³¤ÎÄøÅ٤Ǥ褤¤À¤í¤¦¡£
¥Ó¥ë¥É¤·¤Æ»È¤Ã¤Æ¤ß¤ë
¤Ç¤Ï¡¢Áᮥӥë¥É¤·¤Æ»È¤Ã¤Æ¤ß¤è¤¦¡£Åö¤¿¤êÁ°¤À¤¬¡¢¼¡¤Î¤è¤¦¤ËCSV¥Õ¥¡¥¤¥ë¤òTSV¥Ç¡¼¥¿¤ØÊÑ´¹¤Ç¤¤ë¤³¤È¤ò³Îǧ¤Ç¤¤ë¤Ï¤º¤À¡£
(¤³¤ì¤Þ¤Ç¤Ë¹½ÃÛ¤·¤¿´Ä¶¤Ç¥Ó¥ë¥É¤¹¤ë¤È¡¢¼ÂºÝ¤Ë¤Ï¥³¥ó¥Ñ¥¤¥ë»þ¤Ë¥ï¡¼¥Ë¥ó¥°¤¬½Ð¤ë¤Î¤À¤¬¡¢¸«¤Ë¤¯¤¯¤Ê¤ë¤Î¤Çº£²ó¤Ï¤½¤³¤ÏÇÓ½ü¤·¤¿¡£¤³¤ÎÅÀ¤â¤¤¤º¤ì¼è¤ê¾å¤²¤ë)
¤³¤ó¤Ê´¶¤¸¤ÇÈæ³ÓŪ´Êñ¤Ëñµ¡Ç½¤Î¥³¥Þ¥ó¥É¤Ï³«È¯¤Ç¤¤ë¡£C¤Ç³«È¯¤¹¤ë¤È°Õ¼±¤·¤Ê¤¯¤Æ¤â¡¢¹â®¤ËÆ°ºî¤¹¤ë¥×¥í¥°¥é¥à¤¬½ÐÍè¾å¤¬¤ê¤ä¤¹¤¤¡£´·¤ì¤Ê¤¤¤È¥Ý¥¤¥ó¥¿¤ä»²¾È¡¢¥á¥â¥ê¤Î»È¤¤¤ËÆñµ·¤¹¤ë¤È¤Ï»×¤¦¤¬¡¢»È¤¨¤ë¤è¤¦¤Ë¤Ê¤Ã¤Æ¤ª¤¯¤È¤Ä¤Ö¤·¤¬¸ú¤¤ä¤¹¤¤¥×¥í¥°¥é¥ß¥ó¥°¸À¸ì¤Ê¤Î¤À¡£
¡û»²¹Í
RFC4180 - Common Format and MIME Type for Comma-Separated Values (CSV) Files
Definition of tab-separated-values (tsv), Internet Assigned Numbers Authority