1 | /* Adapted from osm2pgsql by Martijn van Oosterhout <kleptog@svana.org> 2008 */ |
---|
2 | /* Licence: GPL */ |
---|
3 | #define _GNU_SOURCE |
---|
4 | #define _LARGEFILE64_SOURCE |
---|
5 | |
---|
6 | #include <stdio.h> |
---|
7 | #include <unistd.h> |
---|
8 | #include <string.h> |
---|
9 | #include <sys/types.h> |
---|
10 | #include <sys/stat.h> |
---|
11 | #include <fcntl.h> |
---|
12 | #include <zlib.h> |
---|
13 | #include <bzlib.h> |
---|
14 | |
---|
15 | #include "input.h" |
---|
16 | |
---|
17 | struct Input { |
---|
18 | char *name; |
---|
19 | enum { plainFile, gzipFile, bzip2File } type; |
---|
20 | void *fileHandle; |
---|
21 | // needed by bzip2 when decompressing from multiple streams. other |
---|
22 | // decompressors must ignore it. |
---|
23 | FILE *systemHandle; |
---|
24 | int eof; |
---|
25 | char buf[4096]; |
---|
26 | int buf_ptr, buf_fill; |
---|
27 | }; |
---|
28 | |
---|
29 | // tries to re-open the bz stream at the next stream start. |
---|
30 | // returns 0 on success, -1 on failure. |
---|
31 | int bzReOpen(struct Input *ctx, int *error) { |
---|
32 | // for copying out the last unused part of the block which |
---|
33 | // has an EOS token in it. needed for re-initialising the |
---|
34 | // next stream. |
---|
35 | unsigned char unused[BZ_MAX_UNUSED]; |
---|
36 | void *unused_tmp_ptr = NULL; |
---|
37 | int nUnused, i; |
---|
38 | |
---|
39 | BZ2_bzReadGetUnused(error, (BZFILE *)(ctx->fileHandle), &unused_tmp_ptr, &nUnused); |
---|
40 | if (*error != BZ_OK) return -1; |
---|
41 | |
---|
42 | // when bzReadClose is called the unused buffer is deallocated, |
---|
43 | // so it needs to be copied somewhere safe first. |
---|
44 | for (i = 0; i < nUnused; ++i) |
---|
45 | unused[i] = ((unsigned char *)unused_tmp_ptr)[i]; |
---|
46 | |
---|
47 | BZ2_bzReadClose(error, (BZFILE *)(ctx->fileHandle)); |
---|
48 | if (*error != BZ_OK) return -1; |
---|
49 | |
---|
50 | // reassign the file handle |
---|
51 | ctx->fileHandle = BZ2_bzReadOpen(error, ctx->systemHandle, 0, 0, unused, nUnused); |
---|
52 | if (ctx->fileHandle == NULL || *error != BZ_OK) return -1; |
---|
53 | |
---|
54 | return 0; |
---|
55 | } |
---|
56 | |
---|
57 | int readFile(void *context, char * buffer, int len) |
---|
58 | { |
---|
59 | struct Input *ctx = context; |
---|
60 | void *f = ctx->fileHandle; |
---|
61 | int l = 0, error = 0; |
---|
62 | |
---|
63 | if (ctx->eof || (len == 0)) |
---|
64 | return 0; |
---|
65 | |
---|
66 | switch(ctx->type) { |
---|
67 | case plainFile: |
---|
68 | l = read(*(int *)f, buffer, len); |
---|
69 | if (l <= 0) ctx->eof = 1; |
---|
70 | break; |
---|
71 | case gzipFile: |
---|
72 | l = gzread((gzFile)f, buffer, len); |
---|
73 | if (l <= 0) ctx->eof = 1; |
---|
74 | break; |
---|
75 | case bzip2File: |
---|
76 | l = BZ2_bzRead(&error, (BZFILE *)f, buffer, len); |
---|
77 | |
---|
78 | // error codes BZ_OK and BZ_STREAM_END are both "OK", but the stream |
---|
79 | // end means the reader needs to be reset from the original handle. |
---|
80 | if (error != BZ_OK) { |
---|
81 | // for stream errors, try re-opening the stream before admitting defeat. |
---|
82 | if (error != BZ_STREAM_END || bzReOpen(ctx, &error) != 0) { |
---|
83 | l = 0; |
---|
84 | ctx->eof = 1; |
---|
85 | } |
---|
86 | } |
---|
87 | break; |
---|
88 | default: |
---|
89 | fprintf(stderr, "Bad file type\n"); |
---|
90 | break; |
---|
91 | } |
---|
92 | |
---|
93 | if (l < 0) { |
---|
94 | fprintf(stderr, "File reader received error %d (%d)\n", l, error); |
---|
95 | l = 0; |
---|
96 | } |
---|
97 | |
---|
98 | return l; |
---|
99 | } |
---|
100 | |
---|
101 | char inputGetChar(void *context) |
---|
102 | { |
---|
103 | struct Input *ctx = context; |
---|
104 | |
---|
105 | if (ctx->buf_ptr == ctx->buf_fill) { |
---|
106 | ctx->buf_fill = readFile(context, &ctx->buf[0], sizeof(ctx->buf)); |
---|
107 | ctx->buf_ptr = 0; |
---|
108 | if (ctx->buf_fill == 0) |
---|
109 | return 0; |
---|
110 | if (ctx->buf_fill < 0) { |
---|
111 | perror("Error while reading file"); |
---|
112 | exit(1); |
---|
113 | } |
---|
114 | } |
---|
115 | //readFile(context, &c, 1); |
---|
116 | return ctx->buf[ctx->buf_ptr++]; |
---|
117 | } |
---|
118 | |
---|
119 | int inputEof(void *context) |
---|
120 | { |
---|
121 | return ((struct Input *)context)->eof; |
---|
122 | } |
---|
123 | |
---|
124 | |
---|
125 | void *inputOpen(const char *name) |
---|
126 | { |
---|
127 | const char *ext = strrchr(name, '.'); |
---|
128 | struct Input *ctx = malloc (sizeof(*ctx)); |
---|
129 | |
---|
130 | if (!ctx) |
---|
131 | return NULL; |
---|
132 | |
---|
133 | memset(ctx, 0, sizeof(*ctx)); |
---|
134 | |
---|
135 | ctx->name = strdup(name); |
---|
136 | |
---|
137 | if (ext && !strcmp(ext, ".gz")) { |
---|
138 | ctx->fileHandle = (void *)gzopen(name, "rb"); |
---|
139 | ctx->type = gzipFile; |
---|
140 | } else if (ext && !strcmp(ext, ".bz2")) { |
---|
141 | int error = 0; |
---|
142 | ctx->systemHandle = fopen(name, "rb"); |
---|
143 | if (!ctx->systemHandle) { |
---|
144 | fprintf(stderr, "error while opening file %s\n", name); |
---|
145 | exit(10); |
---|
146 | } |
---|
147 | |
---|
148 | ctx->fileHandle = (void *)BZ2_bzReadOpen(&error, ctx->systemHandle, 0, 0, NULL, 0); |
---|
149 | ctx->type = bzip2File; |
---|
150 | |
---|
151 | } else { |
---|
152 | int *pfd = malloc(sizeof(pfd)); |
---|
153 | if (pfd) { |
---|
154 | if (!strcmp(name, "-")) { |
---|
155 | *pfd = STDIN_FILENO; |
---|
156 | } else { |
---|
157 | int flags = O_RDONLY; |
---|
158 | #ifdef O_LARGEFILE |
---|
159 | flags |= O_LARGEFILE; |
---|
160 | #endif |
---|
161 | *pfd = open(name, flags); |
---|
162 | if (*pfd < 0) { |
---|
163 | free(pfd); |
---|
164 | pfd = NULL; |
---|
165 | } |
---|
166 | } |
---|
167 | } |
---|
168 | ctx->fileHandle = (void *)pfd; |
---|
169 | ctx->type = plainFile; |
---|
170 | } |
---|
171 | if (!ctx->fileHandle) { |
---|
172 | fprintf(stderr, "error while opening file %s\n", name); |
---|
173 | exit(10); |
---|
174 | } |
---|
175 | ctx->buf_ptr = 0; |
---|
176 | ctx->buf_fill = 0; |
---|
177 | return (void *)ctx; |
---|
178 | } |
---|
179 | |
---|
180 | int inputClose(void *context) |
---|
181 | { |
---|
182 | struct Input *ctx = context; |
---|
183 | void *f = ctx->fileHandle; |
---|
184 | |
---|
185 | switch(ctx->type) { |
---|
186 | case plainFile: |
---|
187 | close(*(int *)f); |
---|
188 | free(f); |
---|
189 | break; |
---|
190 | case gzipFile: |
---|
191 | gzclose((gzFile)f); |
---|
192 | break; |
---|
193 | case bzip2File: |
---|
194 | BZ2_bzclose((BZFILE *)f); |
---|
195 | break; |
---|
196 | default: |
---|
197 | fprintf(stderr, "Bad file type\n"); |
---|
198 | break; |
---|
199 | } |
---|
200 | |
---|
201 | free(ctx->name); |
---|
202 | free(ctx); |
---|
203 | return 0; |
---|
204 | } |
---|
205 | |
---|
206 | xmlTextReaderPtr inputUTF8(const char *name) |
---|
207 | { |
---|
208 | void *ctx = inputOpen(name); |
---|
209 | |
---|
210 | if (!ctx) { |
---|
211 | fprintf(stderr, "Input reader create failed for: %s\n", name); |
---|
212 | return NULL; |
---|
213 | } |
---|
214 | |
---|
215 | return xmlReaderForIO(readFile, inputClose, (void *)ctx, NULL, NULL, 0); |
---|
216 | } |
---|