/* * postcodeine.c: * Given a postcode prefix, plot a map of all the postcodes matching that * prefix. * * Copyright (c) 2006 UK Citizens Online Democracy. All rights reserved. * Email: chris@mysociety.org; WWW: http://www.mysociety.org/ * */ static const char rcsid[] = "$Id: postcodeine.c,v 1.5 2006/01/19 19:03:23 chris Exp chris $"; #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "postcodeine.h" #define MIN_LON -11. #define MAX_LON 2. #define MIN_LAT 49.5 #define MAX_LAT 59.5 #define POSTCODE_CACHE "/home/chris/projects/postcodeine/data/postcodes.dat" #define POSTCODE_DIR "/home/chris/projects/mysociety/codepoint-2005-08/Codepoint_2005.3.0_Aug/sorted" #define IMAGE_CACHE_DIR "/home/chris/projects/postcodeine/images" #define die(...) do { fprintf(stderr, "postcodeine[%d]: ", (int)getpid()); fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); exit(1); } while (0) struct image images[] = { { 301, 400, NULL }, { 902, 1200, NULL }, }; #define NIMAGES 2 struct postcodepos { char postcode[10]; double x, y; /* float'd do */ }; /* clear_image I * Clear I. */ static void clear_image(struct image *I) { memset((char*)I->data + I->h * sizeof(pixel*), 0, I->h * I->w * sizeof(pixel)); } /* grid_to_map IRISH E N X Y * Convert the grid position (E, N) to an (X, Y) coordinate on the map. If * IRISH is true, the Irish grid is used; otherwise OSGB. */ static void grid_to_map(const bool is_irish_grid, const int e, const int n, double *x, double *y) { static const char *osgb_params[] = { "+init=world:bng", NULL }, /* We have to put this in manually as there is an error * in the file distributed with proj; unfortunately * this does not count as an "important" bug and so * will not be fixed in Debian. */ *osni_params[] = { "+proj:tmerc", "+ellps=mod_airy", "+lat_0=53d30\'N", "+lon_0=8W", "+x_0=200000", "+y_0=250000", "k_0=1.000035", "+no_defs", NULL }, *merc_params[] = { "+proj=merc", "+ellps=WGS84", NULL }; static projPJ osgb, osni, merc; projUV c1 = {0}, c2 = {0}; static double E0, N0, E1, N1; if (!osgb) { /* XXX these seem to abort on failure rather than returning an error */ if (!(osgb = pj_init(1, (char**)osgb_params))) die("%s: pj_init: %s", osgb_params[0], pj_strerrno(pj_errno)); if (!(osni = pj_init(8, (char**)osni_params))) die("%s: pj_init: %s", osni_params[0], pj_strerrno(pj_errno)); if (!(merc = pj_init(2, (char**)merc_params))) die("%s: pj_init: %s %s", merc_params[0], merc_params[1], pj_strerrno(pj_errno)); /* Figure out bounds of the map. */ c1.u = MIN_LON * DEG_TO_RAD; c1.v = MAX_LAT * DEG_TO_RAD; c2 = pj_fwd(c1, merc); E0 = c2.u; N0 = c2.v; c1.u = MAX_LON * DEG_TO_RAD; c1.v = MIN_LAT * DEG_TO_RAD; c2 = pj_fwd(c1, merc); E1 = c2.u; N1 = c2.v; } /* grid to lat/lon. */ c1.u = e; c1.v = n; c2 = pj_inv(c1, is_irish_grid ? osni : osgb); /* XXX at this point we have a lat/lon registered to the Airy or modified * Airy datum. We should transform it to the WGS84 datum using a Helmert * transform or whatever, but for this scale it's not really worth the * effort. */ /* lat/lon to Mercator eastings/northings. */ c1 = pj_fwd(c2, merc); *x = (c1.u - E0) / (E1 - E0); *y = (c1.v - N0) / (N1 - N0); } struct pcindex { int first, last; }; static int tobase36(const char c) { if (tolower(c) >= 'a' && tolower(c) <= 'z') return tolower(c) - 'a'; else if (c >= '0' && c <= '9') return 26 + c - '0'; else die("bad character '%c'", c); } static bool fread_all(void *buf, const size_t n, FILE *fp) { return fread(buf, 1, n, fp) == n; } static bool fwrite_all(const void *buf, const size_t n, FILE *fp) { return fwrite(buf, 1, n, fp) == n; } static struct postcodepos *read_postcodes_cached(const char *dir, size_t *npcs, struct pcindex i1[36], struct pcindex i2[36][36], struct pcindex i3[36][36][36]) { FILE *fp; struct postcodepos *pp; int a, b, c; struct pcindex ind; if (!(fp = fopen(POSTCODE_CACHE, "r"))) return NULL; fprintf(stderr, "postcodeine[%d]: reading cached postcode data\n", (int)getpid()); if (!fread_all(npcs, sizeof *npcs, fp)) die("%s: read: %s", POSTCODE_CACHE, strerror(errno)); pp = malloc(*npcs * sizeof *pp); if (!fread_all(pp, *npcs * sizeof *pp, fp)) die("%s: read: %s", POSTCODE_CACHE, strerror(errno)); for (a = 0; a < 36; ++a) { if (!fread_all(&ind, sizeof(struct pcindex), fp)) die("%s: read: %s", POSTCODE_CACHE, strerror(errno)); i1[a] = ind; for (b = 0; b < 36; ++b) { if (!fread_all(&ind, sizeof(struct pcindex), fp)) die("%s: read: %s", POSTCODE_CACHE, strerror(errno)); i2[a][b] = ind; for (c = 0; c < 36; ++c) { if (!fread_all(&ind, sizeof(struct pcindex), fp)) die("%s: read: %s", POSTCODE_CACHE, strerror(errno)); i3[a][b][c] = ind; } } } fclose(fp); return pp; } /* read_postcodes DIRECTORY * Read CSV files containing postcodes and their positions from DIRECTORY. */ static struct postcodepos *read_postcodes(const char *dir, size_t *npcs, struct pcindex i1[36], struct pcindex i2[36][36], struct pcindex i3[36][36][36]) { DIR *D; struct postcodepos *pp; size_t n = 0, nalloc; struct dirent *e; char fn[256]; FILE *fp; int a, b, c; struct pcindex ind; if ((pp = read_postcodes_cached(dir, npcs, i1, i2, i3))) return pp; if (!(D = opendir(dir))) die("%s: opendir: %s", dir, strerror(errno)); pp = malloc((nalloc = 100000) * sizeof *pp); while ((e = readdir(D))) { char *q; char line[256]; int linenum; /* Check that the filename ends ".csv" or ".CSV". */ if ((!(q = strstr(e->d_name, ".csv")) && !(q = strstr(e->d_name, ".CSV"))) || q[4]) continue; sprintf(fn, "%s/%s", dir, e->d_name); if (!(fp = fopen(fn, "r"))) die("%s: open: %s", fn, strerror(errno)); linenum = 1; while (fgets(line, sizeof line, fp)) { char *pc, *p; int i, j, E, N; /* * Example line: * "AB119NB",10,"N",18,18,18,0,0,3,0,395004,805097,179,"S00","SN9","00","QA","39","S" */ pc = line + 1; p = strchr(pc, '\"'); if (!p) die("%s:%d: bad data (no quoted postcode)", fn, linenum); *(p++) = 0; for (i = 0; i < 10; ++i) { p = strchr(p, ','); if (!p) die("%s:%d: bad data (not enough fields)", fn, linenum); ++p; } if (2 != sscanf(p, "%d,%d,", &E, &N)) die("%s:%d: bad data (no coordinates)", fn, linenum); /* Some postcodes have no valid coordinate data. */ if (!E && !N) continue; if (n == nalloc) pp = realloc(pp, (nalloc += 1000000) * sizeof *pp); for (i = 0, j = 0; pc[i]; ++i) if (pc[i] != ' ') pp[n].postcode[j++] = pc[i]; pp[n].postcode[j] = 0; if (pp[n].postcode[strspn(pp[n].postcode, "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")]) die("%s:%d: postcode \"%s\" contains invalid characters", fn, linenum, pp[n].postcode); grid_to_map(pp[n].postcode[0] == 'B' && pp[n].postcode[1] == 'T', E, N, &pp[n].x, &pp[n].y); if (n > 0 && strcmp(pp[n - 1].postcode, pp[n].postcode) >= 0) die("%s:%d: postcode \"%s\" does not follow previous postcode \"%s\" lexically", fn, linenum, pp[n].postcode, pp[n - 1].postcode); a = tobase36(pp[n].postcode[0]); b = tobase36(pp[n].postcode[1]); c = tobase36(pp[n].postcode[2]); if (i1[a].first == -1) { i1[a].first = (int)n; } i1[a].last = (int)n; if (i2[a][b].first == -1) i2[a][b].first = (int)n; i2[a][b].last = (int)n; if (i3[a][b][c].first == -1) i3[a][b][c].first = (int)n; i3[a][b][c].last = (int)n; ++linenum; ++n; } if (ferror(fp)) die("%s: %s", fn, strerror(errno)); fclose(fp); } fprintf(stderr, "postcodeine[%d]: writing cached postcode data\n", (int)getpid()); sprintf(fn, "%s.%d", POSTCODE_CACHE, (int)getpid()); if (!(fp = fopen(fn, "w"))) die("%s: open: %s", fn, strerror(errno)); *npcs = n; if (!fwrite_all(npcs, sizeof *npcs, fp) || !fwrite_all(pp, n * sizeof *pp, fp)) die("%s: write: %s", fn, strerror(errno)); for (a = 0; a < 36; ++a) { ind = i1[a]; if (!fwrite_all(&ind, sizeof(struct pcindex), fp)) die("%s: write: %s", fn, strerror(errno)); for (b = 0; b < 36; ++b) { ind = i2[a][b]; if (!fwrite_all(&ind, sizeof(struct pcindex), fp)) die("%s: write: %s", fn, strerror(errno)); for (c = 0; c < 36; ++c) { ind = i3[a][b][c]; if (!fwrite_all(&ind, sizeof(struct pcindex), fp)) die("%s: write: %s", fn, strerror(errno)); } } } fclose(fp); if (-1 == rename(fn, POSTCODE_CACHE)) { unlink(fn); die("%s: rename: %s", fn, strerror(errno)); } return pp; } static void error(const int code, const char *str) { printf( "Status: %d %s\r\n" "Content-Type: text/plain\r\n" "Content-Length: %d\r\n" "\r\n" "%s\n", code, str, strlen(str) + 1, str); } int main(int argc, char *argv[]) { int i, j, k; struct postcodepos *pcs; size_t npcs; struct pcindex idx1[36], idx2[36][36], idx3[36][36][36]; extern bool writepng(const char *filename, const struct image *img); time_t e; struct tm *E; char expirydate[32]; time(&e); e += 365 * 86400; E = gmtime(&e); strftime(expirydate, sizeof expirydate, "%a, %d %b %Y %H:%M:%S GMT", E); /* Must do this here so that error-handling works. */ if (FCGI_Accept() < 0) die("first FCGI_Accept() returned < 0"); fprintf(stderr, "postcodeine[%d]: starting up\n", (int)getpid()); /* Allocate space for the images. */ for (i = 0; i < NIMAGES; ++i) { int y; images[i].data = malloc(images[i].h * sizeof(pixel*) + images[i].h * images[i].w * sizeof(pixel)); for (y = 0; y < images[i].h; ++y) images[i].data[y] = (pixel*)((char*)images[i].data + images[i].h * sizeof(pixel*) + y * images[i].w * sizeof(pixel)); } /* Initialise the three indices. */ for (i = 0; i < 36; ++i) { idx1[i].first = idx1[i].last = -1; for (j = 0; j < 36; ++j) { idx2[i][j].first = idx2[i][j].last = -1; for (k = 0; k < 36; ++k) idx3[i][j][k].first = idx3[i][j][k].last = -1; } } pcs = read_postcodes(POSTCODE_DIR, &npcs, idx1, idx2, idx3); do { char *query; int n0, n1; struct image *I; char fn1[256], fn2[256]; int c; FILE *fp; struct stat st; float X = 0, Y = 0; if (getenv("HTTP_IF_MODIFIED_SINCE")) { printf( "Status: 304 Not Modified\r\n" "Content-Type: image/png\r\n" "\r\n"); continue; } /* First character is "B" for big map or "S" for small map; remainder * is postcode prefix. */ query = getenv("QUERY_STRING"); if (!query || !*query || !strchr("BSXZ", *query)) { error(400, "Bad query"); continue; } if (*query == 'Z') { /* List of postcode zones matching. */ struct postcodezone *Z; size_t ql; char zone[3] = {0}; extern struct postcodezone zones[]; /* in zones.c */ printf( "Content-Type: text/html\r\n" /* sort-of */ "Expires: %s\r\n" "\r\n" "" "", expirydate); ql = 0; if (query[1] && isalpha(query[1])) { zone[0] = query[1]; ql = 1; if (query[2]) { zone[1] = isalpha(query[2]) ? query[2] : 0; ql = 2; } } for (Z = zones; Z->zone; ++Z) { if (!query[1] || memcmp(Z->zone, zone, ql) == 0) printf("", (int)strlen(zone), Z->zone, Z->zone + strlen(zone), Z->name); } printf("
%.*s%s%s
"); continue; } sprintf(fn2, "%s/%s.png", IMAGE_CACHE_DIR, query); sprintf(fn1, "%s.%d", fn2, (int)getpid()); if (*query != 'X' && (fp = fopen(fn2, "r"))) goto showimage; if (*query == 'B') I = images + 1; else if (*query == 'S') I = images; else I = NULL; ++query; if (query[strspn(query, "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789")]) n0 = n1 = -1; else if (!*query) { n0 = 0; n1 = npcs; } else { int a = -1, b = -1, c = -1; a = tobase36(query[0]); if (query[1]) { b = tobase36(query[1]); if (query[2]) c = tobase36(query[2]); } if (c != -1) { size_t l; n0 = idx3[a][b][c].first; n1 = idx3[a][b][c].last + 1; l = strlen(query); /* test on I so that the "mean coordinates" mode isn't an * oracle for postcode-to-place lookups. */ if (l > 3 && I) { /* XXX should bisect */ while (strncmp(pcs[n0].postcode, query, l) && n0 < n1) ++n0; if (n0 == n1) n0 = n1 = -1; else { while (strncmp(pcs[n1].postcode, query, l) && n1 > n0) --n1; ++n1; } if (n0 == n1) n0 = n1 = -1; } } else if (b != -1) { n0 = idx2[a][b].first; n1 = idx2[a][b].last + 1; } else { n0 = idx1[a].first; n1 = idx1[a].last + 1; } } fprintf(stderr, "query = \"%s\"; from = %d; to = %d\n", query, n0, n1); if (I) clear_image(I); if (n0 != -1) { /* Draw a little square on each bit of the image. */ int n, d = 1, N; if (n1 - n0 > 100000) d = 10; else if (n1 - n0 > 10000) d = 2; for (n = n0, N = 0; n < n1; n += d) { int x, y, i, j; ++N; if (!I) { X += pcs[n].x; Y += pcs[n].y; continue; } x = pcs[n].x * I->w; y = pcs[n].y * I->h; for (j = y - 1; j <= y + 1; ++j) { if (j < 0 || j >= I->h) continue; for (i = x - 1; i <= x + 1; ++i) { if (i < 0 || i >= I->w) continue; I->data[j][i] = 1; } } } X /= N; Y /= N; } if (!I) { char buf[32]; sprintf(buf, "%.5f,%.5f", X, Y); printf( "Content-Type: text/plain\r\n" "Content-Length: %d\r\n" "Expires: %s\r\n" "\r\n" "%s", strlen(buf), expirydate, buf); continue; } /* Write a PNG file. */ --query; writepng(fn1, I); rename(fn1, fn2); if (!(fp = fopen(fn2, "r"))) { error(500, strerror(errno)); continue; } showimage: if (-1 == fstat(fileno(fp), &st)) { error(500, strerror(errno)); fclose(fp); continue; } printf( "Content-Type: image/png\r\n" "Content-Length: %u\r\n" "\r\n", (unsigned)st.st_size); while (EOF != (c = getc(fp))) putc(c, stdout); fclose(fp); } while (FCGI_Accept() >= 0); fprintf(stderr, "postcodeine[%d]: shutting down\n", (int)getpid()); return 0; }