123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684 |
- From: Joe Orton <jorton@redhat.com>
- Date: Thu, 20 Oct 2016 11:44:14 +0200
- Subject: Add support for use of the system timezone database
- Add support for use of the system timezone database, rather
- than embedding a copy. Discussed upstream but was not desired.
- History:
- r14: improve check for valid tz file
- r13: adapt for upstream changes to use PHP allocator
- r12: adapt for upstream changes for new zic
- r11: use canonical names to avoid more case sensitivity issues
- round lat/long from zone.tab towards zero per builtin db
- r10: make timezone case insensitive
- r9: fix another compile error without --with-system-tzdata configured (Michael Heimpold)
- r8: fix compile error without --with-system-tzdata configured
- r7: improve check for valid timezone id to exclude directories
- r6: fix fd leak in r5, fix country code/BC flag use in
- timezone_identifiers_list() using system db,
- fix use of PECL timezonedb to override system db,
- r5: reverts addition of "System/Localtime" fake tzname.
- updated for 5.3.0, parses zone.tab to pick up mapping between
- timezone name, country code and long/lat coords
- r4: added "System/Localtime" tzname which uses /etc/localtime
- r3: fix a crash if /usr/share/zoneinfo doesn't exist (Raphael Geissert)
- r2: add filesystem trawl to set up name alias index
- r1: initial revision
- ---
- ext/date/lib/parse_tz.c | 560 +++++++++++++++++++++++++++++++++++++++++++++++-
- ext/date/lib/timelib.m4 | 13 ++
- 2 files changed, 562 insertions(+), 11 deletions(-)
- diff --git a/ext/date/lib/parse_tz.c b/ext/date/lib/parse_tz.c
- index 20d7eea..ed7717e 100644
- --- a/ext/date/lib/parse_tz.c
- +++ b/ext/date/lib/parse_tz.c
- @@ -24,6 +24,16 @@
-
- #include "timelib.h"
-
- +#ifdef HAVE_SYSTEM_TZDATA
- +#include <sys/mman.h>
- +#include <sys/stat.h>
- +#include <limits.h>
- +#include <fcntl.h>
- +#include <unistd.h>
- +
- +#include "php_scandir.h"
- +#endif
- +
- #include <stdio.h>
-
- #ifdef HAVE_LOCALE_H
- @@ -36,8 +46,12 @@
- #include <strings.h>
- #endif
-
- +#ifndef HAVE_SYSTEM_TZDATA
- #define TIMELIB_SUPPORTS_V2DATA
- #include "timezonedb.h"
- +#endif
- +
- +#include <ctype.h>
-
- #if (defined(__APPLE__) || defined(__APPLE_CC__)) && (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__))
- # if defined(__LITTLE_ENDIAN__)
- @@ -59,6 +73,11 @@ static int read_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
- {
- uint32_t version;
-
- + if (memcmp(*tzf, "TZif", 4) == 0) {
- + *tzf += 20;
- + return 0;
- + }
- +
- /* read ID */
- version = (*tzf)[3] - '0';
- *tzf += 4;
- @@ -302,7 +321,429 @@ void timelib_dump_tzinfo(timelib_tzinfo *tz)
- }
- }
-
- -static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
- +#ifdef HAVE_SYSTEM_TZDATA
- +
- +#ifdef HAVE_SYSTEM_TZDATA_PREFIX
- +#define ZONEINFO_PREFIX HAVE_SYSTEM_TZDATA_PREFIX
- +#else
- +#define ZONEINFO_PREFIX "/usr/share/zoneinfo"
- +#endif
- +
- +/* System timezone database pointer. */
- +static const timelib_tzdb *timezonedb_system;
- +
- +/* Hash table entry for the cache of the zone.tab mapping table. */
- +struct location_info {
- + char code[2];
- + double latitude, longitude;
- + char name[64];
- + char *comment;
- + struct location_info *next;
- +};
- +
- +/* Cache of zone.tab. */
- +static struct location_info **system_location_table;
- +
- +/* Size of the zone.tab hash table; a random-ish prime big enough to
- + * prevent too many collisions. */
- +#define LOCINFO_HASH_SIZE (1021)
- +
- +/* Compute a case insensitive hash of str */
- +static uint32_t tz_hash(const char *str)
- +{
- + const unsigned char *p = (const unsigned char *)str;
- + uint32_t hash = 5381;
- + int c;
- +
- + while ((c = tolower(*p++)) != '\0') {
- + hash = (hash << 5) ^ hash ^ c;
- + }
- +
- + return hash % LOCINFO_HASH_SIZE;
- +}
- +
- +/* Parse an ISO-6709 date as used in zone.tab. Returns end of the
- + * parsed string on success, or NULL on parse error. On success,
- + * writes the parsed number to *result. */
- +static char *parse_iso6709(char *p, double *result)
- +{
- + double v, sign;
- + char *pend;
- + size_t len;
- +
- + if (*p == '+')
- + sign = 1.0;
- + else if (*p == '-')
- + sign = -1.0;
- + else
- + return NULL;
- +
- + p++;
- + for (pend = p; *pend >= '0' && *pend <= '9'; pend++)
- + ;;
- +
- + /* Annoying encoding used by zone.tab has no decimal point, so use
- + * the length to determine the format:
- + *
- + * 4 = DDMM
- + * 5 = DDDMM
- + * 6 = DDMMSS
- + * 7 = DDDMMSS
- + */
- + len = pend - p;
- + if (len < 4 || len > 7) {
- + return NULL;
- + }
- +
- + /* p => [D]DD */
- + v = (p[0] - '0') * 10.0 + (p[1] - '0');
- + p += 2;
- + if (len == 5 || len == 7)
- + v = v * 10.0 + (*p++ - '0');
- + /* p => MM[SS] */
- + v += (10.0 * (p[0] - '0')
- + + p[1] - '0') / 60.0;
- + p += 2;
- + /* p => [SS] */
- + if (len > 5) {
- + v += (10.0 * (p[0] - '0')
- + + p[1] - '0') / 3600.0;
- + p += 2;
- + }
- +
- + /* Round to five decimal place, not because it's a good idea,
- + * but, because the builtin data uses rounded data, so, match
- + * that. */
- + *result = trunc(v * sign * 100000.0) / 100000.0;
- +
- + return p;
- +}
- +
- +/* This function parses the zone.tab file to build up the mapping of
- + * timezone to country code and geographic location, and returns a
- + * hash table. The hash table is indexed by the function:
- + *
- + * tz_hash(timezone-name)
- + */
- +static struct location_info **create_location_table(void)
- +{
- + struct location_info **li, *i;
- + char zone_tab[PATH_MAX];
- + char line[512];
- + FILE *fp;
- +
- + strncpy(zone_tab, ZONEINFO_PREFIX "/zone.tab", sizeof zone_tab);
- +
- + fp = fopen(zone_tab, "r");
- + if (!fp) {
- + return NULL;
- + }
- +
- + li = calloc(LOCINFO_HASH_SIZE, sizeof *li);
- +
- + while (fgets(line, sizeof line, fp)) {
- + char *p = line, *code, *name, *comment;
- + uint32_t hash;
- + double latitude, longitude;
- +
- + while (isspace(*p))
- + p++;
- +
- + if (*p == '#' || *p == '\0' || *p == '\n')
- + continue;
- +
- + if (!isalpha(p[0]) || !isalpha(p[1]) || p[2] != '\t')
- + continue;
- +
- + /* code => AA */
- + code = p;
- + p[2] = 0;
- + p += 3;
- +
- + /* coords => [+-][D]DDMM[SS][+-][D]DDMM[SS] */
- + p = parse_iso6709(p, &latitude);
- + if (!p) {
- + continue;
- + }
- + p = parse_iso6709(p, &longitude);
- + if (!p) {
- + continue;
- + }
- +
- + if (!p || *p != '\t') {
- + continue;
- + }
- +
- + /* name = string */
- + name = ++p;
- + while (*p != '\t' && *p && *p != '\n')
- + p++;
- +
- + *p++ = '\0';
- +
- + /* comment = string */
- + comment = p;
- + while (*p != '\t' && *p && *p != '\n')
- + p++;
- +
- + if (*p == '\n' || *p == '\t')
- + *p = '\0';
- +
- + hash = tz_hash(name);
- + i = malloc(sizeof *i);
- + memcpy(i->code, code, 2);
- + strncpy(i->name, name, sizeof i->name);
- + i->comment = strdup(comment);
- + i->longitude = longitude;
- + i->latitude = latitude;
- + i->next = li[hash];
- + li[hash] = i;
- + /* printf("%s [%u, %f, %f]\n", name, hash, latitude, longitude); */
- + }
- +
- + fclose(fp);
- +
- + return li;
- +}
- +
- +/* Return location info from hash table, using given timezone name.
- + * Returns NULL if the name could not be found. */
- +const struct location_info *find_zone_info(struct location_info **li,
- + const char *name)
- +{
- + uint32_t hash = tz_hash(name);
- + const struct location_info *l;
- +
- + if (!li) {
- + return NULL;
- + }
- +
- + for (l = li[hash]; l; l = l->next) {
- + if (strcasecmp(l->name, name) == 0)
- + return l;
- + }
- +
- + return NULL;
- +}
- +
- +/* Filter out some non-tzdata files and the posix/right databases, if
- + * present. */
- +static int index_filter(const struct dirent *ent)
- +{
- + return strcmp(ent->d_name, ".") != 0
- + && strcmp(ent->d_name, "..") != 0
- + && strcmp(ent->d_name, "posix") != 0
- + && strcmp(ent->d_name, "posixrules") != 0
- + && strcmp(ent->d_name, "right") != 0
- + && strstr(ent->d_name, ".list") == NULL
- + && strstr(ent->d_name, ".tab") == NULL;
- +}
- +
- +static int sysdbcmp(const void *first, const void *second)
- +{
- + const timelib_tzdb_index_entry *alpha = first, *beta = second;
- +
- + return strcasecmp(alpha->id, beta->id);
- +}
- +
- +
- +/* Create the zone identifier index by trawling the filesystem. */
- +static void create_zone_index(timelib_tzdb *db)
- +{
- + size_t dirstack_size, dirstack_top;
- + size_t index_size, index_next;
- + timelib_tzdb_index_entry *db_index;
- + char **dirstack;
- +
- + /* LIFO stack to hold directory entries to scan; each slot is a
- + * directory name relative to the zoneinfo prefix. */
- + dirstack_size = 32;
- + dirstack = malloc(dirstack_size * sizeof *dirstack);
- + dirstack_top = 1;
- + dirstack[0] = strdup("");
- +
- + /* Index array. */
- + index_size = 64;
- + db_index = malloc(index_size * sizeof *db_index);
- + index_next = 0;
- +
- + do {
- + struct dirent **ents;
- + char name[PATH_MAX], *top;
- + int count;
- +
- + /* Pop the top stack entry, and iterate through its contents. */
- + top = dirstack[--dirstack_top];
- + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s", top);
- +
- + count = php_scandir(name, &ents, index_filter, php_alphasort);
- +
- + while (count > 0) {
- + struct stat st;
- + const char *leaf = ents[count - 1]->d_name;
- +
- + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s/%s",
- + top, leaf);
- +
- + if (strlen(name) && stat(name, &st) == 0) {
- + /* Name, relative to the zoneinfo prefix. */
- + const char *root = top;
- +
- + if (root[0] == '/') root++;
- +
- + snprintf(name, sizeof name, "%s%s%s", root,
- + *root ? "/": "", leaf);
- +
- + if (S_ISDIR(st.st_mode)) {
- + if (dirstack_top == dirstack_size) {
- + dirstack_size *= 2;
- + dirstack = realloc(dirstack,
- + dirstack_size * sizeof *dirstack);
- + }
- + dirstack[dirstack_top++] = strdup(name);
- + }
- + else {
- + if (index_next == index_size) {
- + index_size *= 2;
- + db_index = realloc(db_index,
- + index_size * sizeof *db_index);
- + }
- +
- + db_index[index_next++].id = strdup(name);
- + }
- + }
- +
- + free(ents[--count]);
- + }
- +
- + if (count != -1) free(ents);
- + free(top);
- + } while (dirstack_top);
- +
- + qsort(db_index, index_next, sizeof *db_index, sysdbcmp);
- +
- + db->index = db_index;
- + db->index_size = index_next;
- +
- + free(dirstack);
- +}
- +
- +#define FAKE_HEADER "1234\0??\1??"
- +#define FAKE_UTC_POS (7 - 4)
- +
- +/* Create a fake data segment for database 'sysdb'. */
- +static void fake_data_segment(timelib_tzdb *sysdb,
- + struct location_info **info)
- +{
- + size_t n;
- + char *data, *p;
- +
- + data = malloc(3 * sysdb->index_size + 7);
- +
- + p = mempcpy(data, FAKE_HEADER, sizeof(FAKE_HEADER) - 1);
- +
- + for (n = 0; n < sysdb->index_size; n++) {
- + const struct location_info *li;
- + timelib_tzdb_index_entry *ent;
- +
- + ent = (timelib_tzdb_index_entry *)&sysdb->index[n];
- +
- + /* Lookup the timezone name in the hash table. */
- + if (strcmp(ent->id, "UTC") == 0) {
- + ent->pos = FAKE_UTC_POS;
- + continue;
- + }
- +
- + li = find_zone_info(info, ent->id);
- + if (li) {
- + /* If found, append the BC byte and the
- + * country code; set the position for this
- + * section of timezone data. */
- + ent->pos = (p - data) - 4;
- + *p++ = '\1';
- + *p++ = li->code[0];
- + *p++ = li->code[1];
- + }
- + else {
- + /* If not found, the timezone data can
- + * point at the header. */
- + ent->pos = 0;
- + }
- + }
- +
- + sysdb->data = (unsigned char *)data;
- +}
- +
- +/* Returns true if the passed-in stat structure describes a
- + * probably-valid timezone file. */
- +static int is_valid_tzfile(const struct stat *st, int fd)
- +{
- + if (fd) {
- + char buf[20];
- + if (read(fd, buf, 20)!=20) {
- + return 0;
- + }
- + lseek(fd, SEEK_SET, 0);
- + if (memcmp(buf, "TZif", 4)) {
- + return 0;
- + }
- + }
- + return S_ISREG(st->st_mode) && st->st_size > 20;
- +}
- +
- +/* To allow timezone names to be used case-insensitively, find the
- + * canonical name for this timezone, if possible. */
- +static const char *canonical_tzname(const char *timezone)
- +{
- + if (timezonedb_system) {
- + timelib_tzdb_index_entry *ent, lookup;
- +
- + lookup.id = (char *)timezone;
- +
- + ent = bsearch(&lookup, timezonedb_system->index,
- + timezonedb_system->index_size, sizeof lookup,
- + sysdbcmp);
- + if (ent) {
- + return ent->id;
- + }
- + }
- +
- + return timezone;
- +}
- +
- +/* Return the mmap()ed tzfile if found, else NULL. On success, the
- + * length of the mapped data is placed in *length. */
- +static char *map_tzfile(const char *timezone, size_t *length)
- +{
- + char fname[PATH_MAX];
- + struct stat st;
- + char *p;
- + int fd;
- +
- + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
- + return NULL;
- + }
- +
- + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", canonical_tzname(timezone));
- +
- + fd = open(fname, O_RDONLY);
- + if (fd == -1) {
- + return NULL;
- + } else if (fstat(fd, &st) != 0 || !is_valid_tzfile(&st, fd)) {
- + close(fd);
- + return NULL;
- + }
- +
- + *length = st.st_size;
- + p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
- + close(fd);
- +
- + return p != MAP_FAILED ? p : NULL;
- +}
- +
- +#endif
- +
- +static int inmem_seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
- {
- int left = 0, right = tzdb->index_size - 1;
- #ifdef HAVE_SETLOCALE
- @@ -341,21 +782,88 @@ static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const
- return 0;
- }
-
- +static int seek_to_tz_position(const unsigned char **tzf, char *timezone,
- + char **map, size_t *maplen,
- + const timelib_tzdb *tzdb)
- +{
- +#ifdef HAVE_SYSTEM_TZDATA
- + if (tzdb == timezonedb_system) {
- + char *orig;
- +
- + orig = map_tzfile(timezone, maplen);
- + if (orig == NULL) {
- + return 0;
- + }
- +
- + (*tzf) = (unsigned char *)orig;
- + *map = orig;
- + return 1;
- + }
- + else
- +#endif
- + {
- + return inmem_seek_to_tz_position(tzf, timezone, tzdb);
- + }
- +}
- +
- const timelib_tzdb *timelib_builtin_db(void)
- {
- +#ifdef HAVE_SYSTEM_TZDATA
- + if (timezonedb_system == NULL) {
- + timelib_tzdb *tmp = malloc(sizeof *tmp);
- +
- + tmp->version = "0.system";
- + tmp->data = NULL;
- + create_zone_index(tmp);
- + system_location_table = create_location_table();
- + fake_data_segment(tmp, system_location_table);
- + timezonedb_system = tmp;
- + }
- +
- + return timezonedb_system;
- +#else
- return &timezonedb_builtin;
- +#endif
- }
-
- const timelib_tzdb_index_entry *timelib_timezone_builtin_identifiers_list(int *count)
- {
- +#ifdef HAVE_SYSTEM_TZDATA
- + *count = timezonedb_system->index_size;
- + return timezonedb_system->index;
- +#else
- *count = sizeof(timezonedb_idx_builtin) / sizeof(*timezonedb_idx_builtin);
- return timezonedb_idx_builtin;
- +#endif
- }
-
- int timelib_timezone_id_is_valid(char *timezone, const timelib_tzdb *tzdb)
- {
- const unsigned char *tzf;
- - return (seek_to_tz_position(&tzf, timezone, tzdb));
- +
- +#ifdef HAVE_SYSTEM_TZDATA
- + if (tzdb == timezonedb_system) {
- + char fname[PATH_MAX];
- + struct stat st;
- +
- + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
- + return 0;
- + }
- +
- + if (system_location_table) {
- + if (find_zone_info(system_location_table, timezone) != NULL) {
- + /* found in cache */
- + return 1;
- + }
- + }
- +
- + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", canonical_tzname(timezone));
- +
- + return stat(fname, &st) == 0 && is_valid_tzfile(&st, 0);
- + }
- +#endif
- +
- + return (inmem_seek_to_tz_position(&tzf, timezone, tzdb));
- }
-
- static void skip_64bit_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
- @@ -380,24 +888,54 @@ static void read_64bit_header(const unsigned char **tzf, timelib_tzinfo *tz)
- timelib_tzinfo *timelib_parse_tzfile(char *timezone, const timelib_tzdb *tzdb)
- {
- const unsigned char *tzf;
- + char *memmap = NULL;
- + size_t maplen;
- timelib_tzinfo *tmp;
- int version;
-
- - if (seek_to_tz_position(&tzf, timezone, tzdb)) {
- + if (seek_to_tz_position(&tzf, timezone, &memmap, &maplen, tzdb)) {
- tmp = timelib_tzinfo_ctor(timezone);
-
- version = read_preamble(&tzf, tmp);
- read_header(&tzf, tmp);
- read_transistions(&tzf, tmp);
- read_types(&tzf, tmp);
- - if (version == 2) {
- - skip_64bit_preamble(&tzf, tmp);
- - read_64bit_header(&tzf, tmp);
- - skip_64bit_transistions(&tzf, tmp);
- - skip_64bit_types(&tzf, tmp);
- - skip_posix_string(&tzf, tmp);
- - }
- - read_location(&tzf, tmp);
- +
- +#ifdef HAVE_SYSTEM_TZDATA
- + if (memmap) {
- + const struct location_info *li;
- +
- + /* TZif-style - grok the location info from the system database,
- + * if possible. */
- +
- + if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
- + tmp->location.comments = timelib_strdup(li->comment);
- + strncpy(tmp->location.country_code, li->code, 2);
- + tmp->location.longitude = li->longitude;
- + tmp->location.latitude = li->latitude;
- + tmp->bc = 1;
- + }
- + else {
- + strcpy(tmp->location.country_code, "??");
- + tmp->bc = 0;
- + tmp->location.comments = timelib_strdup("");
- + }
- +
- + /* Now done with the mmap segment - discard it. */
- + munmap(memmap, maplen);
- + } else
- +#endif
- + {
- + /* PHP-style - use the embedded info. */
- + if (version == 2) {
- + skip_64bit_preamble(&tzf, tmp);
- + read_64bit_header(&tzf, tmp);
- + skip_64bit_transistions(&tzf, tmp);
- + skip_64bit_types(&tzf, tmp);
- + skip_posix_string(&tzf, tmp);
- + }
- + read_location(&tzf, tmp);
- + }
- } else {
- tmp = NULL;
- }
- diff --git a/ext/date/lib/timelib.m4 b/ext/date/lib/timelib.m4
- index 99bf9fa..4bf7e46 100644
- --- a/ext/date/lib/timelib.m4
- +++ b/ext/date/lib/timelib.m4
- @@ -78,3 +78,16 @@ stdlib.h
-
- dnl Check for strtoll, atoll
- AC_CHECK_FUNCS(strtoll atoll strftime gettimeofday)
- +
- +PHP_ARG_WITH(system-tzdata, for use of system timezone data,
- +[ --with-system-tzdata[=DIR] to specify use of system timezone data],
- +no, no)
- +
- +if test "$PHP_SYSTEM_TZDATA" != "no"; then
- + AC_DEFINE(HAVE_SYSTEM_TZDATA, 1, [Define if system timezone data is used])
- +
- + if test "$PHP_SYSTEM_TZDATA" != "yes"; then
- + AC_DEFINE_UNQUOTED(HAVE_SYSTEM_TZDATA_PREFIX, "$PHP_SYSTEM_TZDATA",
- + [Define for location of system timezone data])
- + fi
- +fi
|