mirror of
https://github.com/git/git.git
synced 2024-11-07 01:32:58 +01:00
2744b2344d
I looked a bit at my old BK tools for the same thing, but they were just so horrid in many ways that I largely rewrote it all and these tools do things a bit differently. Instead of aggressively piping data from one process to another (which was clever but very hard to follow), this first just splits out the mbox into many smaller email files, and then does some scripts on these temporary files.
144 lines
2.5 KiB
C
144 lines
2.5 KiB
C
/*
|
|
* Totally braindamaged mbox splitter program.
|
|
*
|
|
* It just splits a mbox into a list of files: "0001" "0002" ..
|
|
* so you can process them further from there.
|
|
*/
|
|
#include <unistd.h>
|
|
#include <stdlib.h>
|
|
#include <fcntl.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/mman.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <ctype.h>
|
|
#include <assert.h>
|
|
|
|
static int usage(void)
|
|
{
|
|
fprintf(stderr, "mailsplit <mbox> <directory>\n");
|
|
exit(1);
|
|
}
|
|
|
|
static int linelen(const char *map, unsigned long size)
|
|
{
|
|
int len = 0, c;
|
|
|
|
do {
|
|
c = *map;
|
|
map++;
|
|
size--;
|
|
len++;
|
|
} while (size && c != '\n');
|
|
return len;
|
|
}
|
|
|
|
static int is_from_line(const char *line, int len)
|
|
{
|
|
const char *colon;
|
|
|
|
if (len < 20 || memcmp("From ", line, 5))
|
|
return 0;
|
|
|
|
colon = line + len - 2;
|
|
line += 5;
|
|
for (;;) {
|
|
if (colon < line)
|
|
return 0;
|
|
if (*--colon == ':')
|
|
break;
|
|
}
|
|
|
|
if (!isdigit(colon[-4]) ||
|
|
!isdigit(colon[-2]) ||
|
|
!isdigit(colon[-1]) ||
|
|
!isdigit(colon[ 1]) ||
|
|
!isdigit(colon[ 2]))
|
|
return 0;
|
|
|
|
/* year */
|
|
if (strtol(colon+3, NULL, 10) <= 90)
|
|
return 0;
|
|
|
|
/* Ok, close enough */
|
|
return 1;
|
|
}
|
|
|
|
static int parse_email(const void *map, unsigned long size)
|
|
{
|
|
unsigned long offset;
|
|
|
|
if (size < 6 || memcmp("From ", map, 5))
|
|
goto corrupt;
|
|
|
|
/* Make sure we don't trigger on this first line */
|
|
map++; size--; offset=1;
|
|
|
|
/*
|
|
* Search for a line beginning with "From ", and
|
|
* having smething that looks like a date format.
|
|
*/
|
|
do {
|
|
int len = linelen(map, size);
|
|
if (is_from_line(map, len))
|
|
return offset;
|
|
map += len;
|
|
size -= len;
|
|
offset += len;
|
|
} while (size);
|
|
return offset;
|
|
|
|
corrupt:
|
|
fprintf(stderr, "corrupt mailbox\n");
|
|
exit(1);
|
|
}
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
int fd, nr;
|
|
struct stat st;
|
|
unsigned long size;
|
|
void *map;
|
|
|
|
if (argc != 3)
|
|
usage();
|
|
fd = open(argv[1], O_RDONLY);
|
|
if (fd < 0) {
|
|
perror(argv[1]);
|
|
exit(1);
|
|
}
|
|
if (chdir(argv[2]) < 0)
|
|
usage();
|
|
if (fstat(fd, &st) < 0) {
|
|
perror("stat");
|
|
exit(1);
|
|
}
|
|
size = st.st_size;
|
|
map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
|
|
if (-1 == (int)(long)map) {
|
|
perror("mmap");
|
|
exit(1);
|
|
}
|
|
close(fd);
|
|
nr = 0;
|
|
do {
|
|
char name[10];
|
|
unsigned long len = parse_email(map, size);
|
|
assert(len <= size);
|
|
sprintf(name, "%04d", ++nr);
|
|
fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
|
|
if (fd < 0) {
|
|
perror(name);
|
|
exit(1);
|
|
}
|
|
if (write(fd, map, len) != len) {
|
|
perror("write");
|
|
exit(1);
|
|
}
|
|
close(fd);
|
|
map += len;
|
|
size -= len;
|
|
} while (size > 0);
|
|
return 0;
|
|
}
|