3
0

unix_io.c 17 KB


  1. /*
  2. * unix_io.c --- This is the Unix (well, really POSIX) implementation
  3. * of the I/O manager.
  4. *
  5. * Implements a one-block write-through cache.
  6. *
  7. * Includes support for Windows NT support under Cygwin.
  8. *
  9. * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
  10. * 2002 by Theodore Ts'o.
  11. *
  12. * %Begin-Header%
  13. * This file may be redistributed under the terms of the GNU Public
  14. * License.
  15. * %End-Header%
  16. */
  17. #include <stdio.h>
  18. #include <string.h>
  19. #if HAVE_UNISTD_H
  20. #include <unistd.h>
  21. #endif
  22. #if HAVE_ERRNO_H
  23. #include <errno.h>
  24. #endif
  25. #include <fcntl.h>
  26. #include <time.h>
  27. #ifdef __linux__
  28. #include <sys/utsname.h>
  29. #endif
  30. #if HAVE_SYS_STAT_H
  31. #include <sys/stat.h>
  32. #endif
  33. #if HAVE_SYS_TYPES_H
  34. #include <sys/types.h>
  35. #endif
  36. #if HAVE_SYS_RESOURCE_H
  37. #include <sys/resource.h>
  38. #endif
  39. #include "ext2_fs.h"
  40. #include "ext2fs.h"
  41. /*
  42. * For checking structure magic numbers...
  43. */
  44. #define EXT2_CHECK_MAGIC(struct, code) \
  45. if ((struct)->magic != (code)) return (code)
  46. struct unix_cache {
  47. char *buf;
  48. unsigned long block;
  49. int access_time;
  50. unsigned dirty:1;
  51. unsigned in_use:1;
  52. };
  53. #define CACHE_SIZE 8
  54. #define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */
  55. #define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */
  56. struct unix_private_data {
  57. int magic;
  58. int dev;
  59. int flags;
  60. int access_time;
  61. ext2_loff_t offset;
  62. struct unix_cache cache[CACHE_SIZE];
  63. };
  64. static errcode_t unix_open(const char *name, int flags, io_channel *channel);
  65. static errcode_t unix_close(io_channel channel);
  66. static errcode_t unix_set_blksize(io_channel channel, int blksize);
  67. static errcode_t unix_read_blk(io_channel channel, unsigned long block,
  68. int count, void *data);
  69. static errcode_t unix_write_blk(io_channel channel, unsigned long block,
  70. int count, const void *data);
  71. static errcode_t unix_flush(io_channel channel);
  72. static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
  73. int size, const void *data);
  74. static errcode_t unix_set_option(io_channel channel, const char *option,
  75. const char *arg);
  76. static void reuse_cache(io_channel channel, struct unix_private_data *data,
  77. struct unix_cache *cache, unsigned long block);
  78. /* __FreeBSD_kernel__ is defined by GNU/kFreeBSD - the FreeBSD kernel
  79. * does not know buffered block devices - everything is raw. */
  80. #if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  81. #define NEED_BOUNCE_BUFFER
  82. #else
  83. #undef NEED_BOUNCE_BUFFER
  84. #endif
  85. static struct struct_io_manager struct_unix_manager = {
  86. EXT2_ET_MAGIC_IO_MANAGER,
  87. "Unix I/O Manager",
  88. unix_open,
  89. unix_close,
  90. unix_set_blksize,
  91. unix_read_blk,
  92. unix_write_blk,
  93. unix_flush,
  94. #ifdef NEED_BOUNCE_BUFFER
  95. 0,
  96. #else
  97. unix_write_byte,
  98. #endif
  99. unix_set_option
  100. };
  101. io_manager unix_io_manager = &struct_unix_manager;
  102. /*
  103. * Here are the raw I/O functions
  104. */
  105. #ifndef NEED_BOUNCE_BUFFER
  106. static errcode_t raw_read_blk(io_channel channel,
  107. struct unix_private_data *data,
  108. unsigned long block,
  109. int count, void *buf)
  110. {
  111. errcode_t retval;
  112. ssize_t size;
  113. ext2_loff_t location;
  114. int actual = 0;
  115. size = (count < 0) ? -count : count * channel->block_size;
  116. location = ((ext2_loff_t) block * channel->block_size) + data->offset;
  117. if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
  118. retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
  119. goto error_out;
  120. }
  121. actual = read(data->dev, buf, size);
  122. if (actual != size) {
  123. if (actual < 0)
  124. actual = 0;
  125. retval = EXT2_ET_SHORT_READ;
  126. goto error_out;
  127. }
  128. return 0;
  129. error_out:
  130. memset((char *) buf+actual, 0, size-actual);
  131. if (channel->read_error)
  132. retval = (channel->read_error)(channel, block, count, buf,
  133. size, actual, retval);
  134. return retval;
  135. }
  136. #else /* NEED_BOUNCE_BUFFER */
  137. /*
  138. * Windows and FreeBSD block devices only allow sector alignment IO in offset and size
  139. */
  140. static errcode_t raw_read_blk(io_channel channel,
  141. struct unix_private_data *data,
  142. unsigned long block,
  143. int count, void *buf)
  144. {
  145. errcode_t retval;
  146. size_t size, alignsize, fragment;
  147. ext2_loff_t location;
  148. int total = 0, actual;
  149. #define BLOCKALIGN 512
  150. char sector[BLOCKALIGN];
  151. size = (count < 0) ? -count : count * channel->block_size;
  152. location = ((ext2_loff_t) block * channel->block_size) + data->offset;
  153. #ifdef DEBUG
  154. printf("count=%d, size=%d, block=%d, blk_size=%d, location=%lx\n",
  155. count, size, block, channel->block_size, location);
  156. #endif
  157. if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
  158. retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
  159. goto error_out;
  160. }
  161. fragment = size % BLOCKALIGN;
  162. alignsize = size - fragment;
  163. if (alignsize) {
  164. actual = read(data->dev, buf, alignsize);
  165. if (actual != alignsize)
  166. goto short_read;
  167. }
  168. if (fragment) {
  169. actual = read(data->dev, sector, BLOCKALIGN);
  170. if (actual != BLOCKALIGN)
  171. goto short_read;
  172. memcpy(buf+alignsize, sector, fragment);
  173. }
  174. return 0;
  175. short_read:
  176. if (actual>0)
  177. total += actual;
  178. retval = EXT2_ET_SHORT_READ;
  179. error_out:
  180. memset((char *) buf+total, 0, size-actual);
  181. if (channel->read_error)
  182. retval = (channel->read_error)(channel, block, count, buf,
  183. size, actual, retval);
  184. return retval;
  185. }
  186. #endif
  187. static errcode_t raw_write_blk(io_channel channel,
  188. struct unix_private_data *data,
  189. unsigned long block,
  190. int count, const void *buf)
  191. {
  192. ssize_t size;
  193. ext2_loff_t location;
  194. int actual = 0;
  195. errcode_t retval;
  196. if (count == 1)
  197. size = channel->block_size;
  198. else {
  199. if (count < 0)
  200. size = -count;
  201. else
  202. size = count * channel->block_size;
  203. }
  204. location = ((ext2_loff_t) block * channel->block_size) + data->offset;
  205. if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
  206. retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
  207. goto error_out;
  208. }
  209. actual = write(data->dev, buf, size);
  210. if (actual != size) {
  211. retval = EXT2_ET_SHORT_WRITE;
  212. goto error_out;
  213. }
  214. return 0;
  215. error_out:
  216. if (channel->write_error)
  217. retval = (channel->write_error)(channel, block, count, buf,
  218. size, actual, retval);
  219. return retval;
  220. }
  221. /*
  222. * Here we implement the cache functions
  223. */
  224. /* Allocate the cache buffers */
  225. static errcode_t alloc_cache(io_channel channel,
  226. struct unix_private_data *data)
  227. {
  228. errcode_t retval;
  229. struct unix_cache *cache;
  230. int i;
  231. data->access_time = 0;
  232. for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
  233. cache->block = 0;
  234. cache->access_time = 0;
  235. cache->dirty = 0;
  236. cache->in_use = 0;
  237. if ((retval = ext2fs_get_mem(channel->block_size,
  238. &cache->buf)))
  239. return retval;
  240. }
  241. return 0;
  242. }
  243. /* Free the cache buffers */
  244. static void free_cache(struct unix_private_data *data)
  245. {
  246. struct unix_cache *cache;
  247. int i;
  248. data->access_time = 0;
  249. for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
  250. cache->block = 0;
  251. cache->access_time = 0;
  252. cache->dirty = 0;
  253. cache->in_use = 0;
  254. if (cache->buf)
  255. ext2fs_free_mem(&cache->buf);
  256. cache->buf = 0;
  257. }
  258. }
  259. #ifndef NO_IO_CACHE
  260. /*
  261. * Try to find a block in the cache. If the block is not found, and
  262. * eldest is a non-zero pointer, then fill in eldest with the cache
  263. * entry to that should be reused.
  264. */
  265. static struct unix_cache *find_cached_block(struct unix_private_data *data,
  266. unsigned long block,
  267. struct unix_cache **eldest)
  268. {
  269. struct unix_cache *cache, *unused_cache, *oldest_cache;
  270. int i;
  271. unused_cache = oldest_cache = 0;
  272. for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
  273. if (!cache->in_use) {
  274. if (!unused_cache)
  275. unused_cache = cache;
  276. continue;
  277. }
  278. if (cache->block == block) {
  279. cache->access_time = ++data->access_time;
  280. return cache;
  281. }
  282. if (!oldest_cache ||
  283. (cache->access_time < oldest_cache->access_time))
  284. oldest_cache = cache;
  285. }
  286. if (eldest)
  287. *eldest = (unused_cache) ? unused_cache : oldest_cache;
  288. return 0;
  289. }
  290. /*
  291. * Reuse a particular cache entry for another block.
  292. */
  293. static void reuse_cache(io_channel channel, struct unix_private_data *data,
  294. struct unix_cache *cache, unsigned long block)
  295. {
  296. if (cache->dirty && cache->in_use)
  297. raw_write_blk(channel, data, cache->block, 1, cache->buf);
  298. cache->in_use = 1;
  299. cache->dirty = 0;
  300. cache->block = block;
  301. cache->access_time = ++data->access_time;
  302. }
  303. /*
  304. * Flush all of the blocks in the cache
  305. */
  306. static errcode_t flush_cached_blocks(io_channel channel,
  307. struct unix_private_data *data,
  308. int invalidate)
  309. {
  310. struct unix_cache *cache;
  311. errcode_t retval, retval2;
  312. int i;
  313. retval2 = 0;
  314. for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
  315. if (!cache->in_use)
  316. continue;
  317. if (invalidate)
  318. cache->in_use = 0;
  319. if (!cache->dirty)
  320. continue;
  321. retval = raw_write_blk(channel, data,
  322. cache->block, 1, cache->buf);
  323. if (retval)
  324. retval2 = retval;
  325. else
  326. cache->dirty = 0;
  327. }
  328. return retval2;
  329. }
  330. #endif /* NO_IO_CACHE */
  331. static errcode_t unix_open(const char *name, int flags, io_channel *channel)
  332. {
  333. io_channel io = NULL;
  334. struct unix_private_data *data = NULL;
  335. errcode_t retval;
  336. int open_flags;
  337. struct stat st;
  338. #ifdef __linux__
  339. struct utsname ut;
  340. #endif
  341. if (name == 0)
  342. return EXT2_ET_BAD_DEVICE_NAME;
  343. retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
  344. if (retval)
  345. return retval;
  346. memset(io, 0, sizeof(struct struct_io_channel));
  347. io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
  348. retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data);
  349. if (retval)
  350. goto cleanup;
  351. io->manager = unix_io_manager;
  352. retval = ext2fs_get_mem(strlen(name)+1, &io->name);
  353. if (retval)
  354. goto cleanup;
  355. strcpy(io->name, name);
  356. io->private_data = data;
  357. io->block_size = 1024;
  358. io->read_error = 0;
  359. io->write_error = 0;
  360. io->refcount = 1;
  361. memset(data, 0, sizeof(struct unix_private_data));
  362. data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
  363. if ((retval = alloc_cache(io, data)))
  364. goto cleanup;
  365. open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
  366. #ifdef CONFIG_LFS
  367. data->dev = open64(io->name, open_flags);
  368. #else
  369. data->dev = open(io->name, open_flags);
  370. #endif
  371. if (data->dev < 0) {
  372. retval = errno;
  373. goto cleanup;
  374. }
  375. #ifdef __linux__
  376. #undef RLIM_INFINITY
  377. #if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
  378. #define RLIM_INFINITY ((unsigned long)(~0UL>>1))
  379. #else
  380. #define RLIM_INFINITY (~0UL)
  381. #endif
  382. /*
  383. * Work around a bug in 2.4.10-2.4.18 kernels where writes to
  384. * block devices are wrongly getting hit by the filesize
  385. * limit. This workaround isn't perfect, since it won't work
  386. * if glibc wasn't built against 2.2 header files. (Sigh.)
  387. *
  388. */
  389. if ((flags & IO_FLAG_RW) &&
  390. (uname(&ut) == 0) &&
  391. ((ut.release[0] == '2') && (ut.release[1] == '.') &&
  392. (ut.release[2] == '4') && (ut.release[3] == '.') &&
  393. (ut.release[4] == '1') && (ut.release[5] >= '0') &&
  394. (ut.release[5] < '8')) &&
  395. (fstat(data->dev, &st) == 0) &&
  396. (S_ISBLK(st.st_mode))) {
  397. struct rlimit rlim;
  398. rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
  399. setrlimit(RLIMIT_FSIZE, &rlim);
  400. getrlimit(RLIMIT_FSIZE, &rlim);
  401. if (((unsigned long) rlim.rlim_cur) <
  402. ((unsigned long) rlim.rlim_max)) {
  403. rlim.rlim_cur = rlim.rlim_max;
  404. setrlimit(RLIMIT_FSIZE, &rlim);
  405. }
  406. }
  407. #endif
  408. *channel = io;
  409. return 0;
  410. cleanup:
  411. if (data) {
  412. free_cache(data);
  413. ext2fs_free_mem(&data);
  414. }
  415. if (io)
  416. ext2fs_free_mem(&io);
  417. return retval;
  418. }
  419. static errcode_t unix_close(io_channel channel)
  420. {
  421. struct unix_private_data *data;
  422. errcode_t retval = 0;
  423. EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
  424. data = (struct unix_private_data *) channel->private_data;
  425. EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
  426. if (--channel->refcount > 0)
  427. return 0;
  428. #ifndef NO_IO_CACHE
  429. retval = flush_cached_blocks(channel, data, 0);
  430. #endif
  431. if (close(data->dev) < 0)
  432. retval = errno;
  433. free_cache(data);
  434. ext2fs_free_mem(&channel->private_data);
  435. if (channel->name)
  436. ext2fs_free_mem(&channel->name);
  437. ext2fs_free_mem(&channel);
  438. return retval;
  439. }
  440. static errcode_t unix_set_blksize(io_channel channel, int blksize)
  441. {
  442. struct unix_private_data *data;
  443. errcode_t retval;
  444. EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
  445. data = (struct unix_private_data *) channel->private_data;
  446. EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
  447. if (channel->block_size != blksize) {
  448. #ifndef NO_IO_CACHE
  449. if ((retval = flush_cached_blocks(channel, data, 0)))
  450. return retval;
  451. #endif
  452. channel->block_size = blksize;
  453. free_cache(data);
  454. if ((retval = alloc_cache(channel, data)))
  455. return retval;
  456. }
  457. return 0;
  458. }
  459. static errcode_t unix_read_blk(io_channel channel, unsigned long block,
  460. int count, void *buf)
  461. {
  462. struct unix_private_data *data;
  463. struct unix_cache *cache, *reuse[READ_DIRECT_SIZE];
  464. errcode_t retval;
  465. char *cp;
  466. int i, j;
  467. EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
  468. data = (struct unix_private_data *) channel->private_data;
  469. EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
  470. #ifdef NO_IO_CACHE
  471. return raw_read_blk(channel, data, block, count, buf);
  472. #else
  473. /*
  474. * If we're doing an odd-sized read or a very large read,
  475. * flush out the cache and then do a direct read.
  476. */
  477. if (count < 0 || count > WRITE_DIRECT_SIZE) {
  478. if ((retval = flush_cached_blocks(channel, data, 0)))
  479. return retval;
  480. return raw_read_blk(channel, data, block, count, buf);
  481. }
  482. cp = buf;
  483. while (count > 0) {
  484. /* If it's in the cache, use it! */
  485. if ((cache = find_cached_block(data, block, &reuse[0]))) {
  486. #ifdef DEBUG
  487. printf("Using cached block %d\n", block);
  488. #endif
  489. memcpy(cp, cache->buf, channel->block_size);
  490. count--;
  491. block++;
  492. cp += channel->block_size;
  493. continue;
  494. }
  495. /*
  496. * Find the number of uncached blocks so we can do a
  497. * single read request
  498. */
  499. for (i=1; i < count; i++)
  500. if (find_cached_block(data, block+i, &reuse[i]))
  501. break;
  502. #ifdef DEBUG
  503. printf("Reading %d blocks starting at %d\n", i, block);
  504. #endif
  505. if ((retval = raw_read_blk(channel, data, block, i, cp)))
  506. return retval;
  507. /* Save the results in the cache */
  508. for (j=0; j < i; j++) {
  509. count--;
  510. cache = reuse[j];
  511. reuse_cache(channel, data, cache, block++);
  512. memcpy(cache->buf, cp, channel->block_size);
  513. cp += channel->block_size;
  514. }
  515. }
  516. return 0;
  517. #endif /* NO_IO_CACHE */
  518. }
  519. static errcode_t unix_write_blk(io_channel channel, unsigned long block,
  520. int count, const void *buf)
  521. {
  522. struct unix_private_data *data;
  523. struct unix_cache *cache, *reuse;
  524. errcode_t retval = 0;
  525. const char *cp;
  526. int writethrough;
  527. EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
  528. data = (struct unix_private_data *) channel->private_data;
  529. EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
  530. #ifdef NO_IO_CACHE
  531. return raw_write_blk(channel, data, block, count, buf);
  532. #else
  533. /*
  534. * If we're doing an odd-sized write or a very large write,
  535. * flush out the cache completely and then do a direct write.
  536. */
  537. if (count < 0 || count > WRITE_DIRECT_SIZE) {
  538. if ((retval = flush_cached_blocks(channel, data, 1)))
  539. return retval;
  540. return raw_write_blk(channel, data, block, count, buf);
  541. }
  542. /*
  543. * For a moderate-sized multi-block write, first force a write
  544. * if we're in write-through cache mode, and then fill the
  545. * cache with the blocks.
  546. */
  547. writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
  548. if (writethrough)
  549. retval = raw_write_blk(channel, data, block, count, buf);
  550. cp = buf;
  551. while (count > 0) {
  552. cache = find_cached_block(data, block, &reuse);
  553. if (!cache) {
  554. cache = reuse;
  555. reuse_cache(channel, data, cache, block);
  556. }
  557. memcpy(cache->buf, cp, channel->block_size);
  558. cache->dirty = !writethrough;
  559. count--;
  560. block++;
  561. cp += channel->block_size;
  562. }
  563. return retval;
  564. #endif /* NO_IO_CACHE */
  565. }
  566. static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
  567. int size, const void *buf)
  568. {
  569. struct unix_private_data *data;
  570. errcode_t retval = 0;
  571. ssize_t actual;
  572. EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
  573. data = (struct unix_private_data *) channel->private_data;
  574. EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
  575. #ifndef NO_IO_CACHE
  576. /*
  577. * Flush out the cache completely
  578. */
  579. if ((retval = flush_cached_blocks(channel, data, 1)))
  580. return retval;
  581. #endif
  582. if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
  583. return errno;
  584. actual = write(data->dev, buf, size);
  585. if (actual != size)
  586. return EXT2_ET_SHORT_WRITE;
  587. return 0;
  588. }
  589. /*
  590. * Flush data buffers to disk.
  591. */
  592. static errcode_t unix_flush(io_channel channel)
  593. {
  594. struct unix_private_data *data;
  595. errcode_t retval = 0;
  596. EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
  597. data = (struct unix_private_data *) channel->private_data;
  598. EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
  599. #ifndef NO_IO_CACHE
  600. retval = flush_cached_blocks(channel, data, 0);
  601. #endif
  602. fsync(data->dev);
  603. return retval;
  604. }
  605. static errcode_t unix_set_option(io_channel channel, const char *option,
  606. const char *arg)
  607. {
  608. struct unix_private_data *data;
  609. unsigned long tmp;
  610. char *end;
  611. EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
  612. data = (struct unix_private_data *) channel->private_data;
  613. EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
  614. if (!strcmp(option, "offset")) {
  615. if (!arg)
  616. return EXT2_ET_INVALID_ARGUMENT;
  617. tmp = strtoul(arg, &end, 0);
  618. if (*end)
  619. return EXT2_ET_INVALID_ARGUMENT;
  620. data->offset = tmp;
  621. return 0;
  622. }
  623. return EXT2_ET_INVALID_ARGUMENT;
  624. }