iblok.h 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. /*
  2. * Copyright (c) 2003 Gunnar Ritter
  3. *
  4. * This software is provided 'as-is', without any express or implied
  5. * warranty. In no event will the authors be held liable for any damages
  6. * arising from the use of this software.
  7. *
  8. * Permission is granted to anyone to use this software for any purpose,
  9. * including commercial applications, and to alter it and redistribute
  10. * it freely, subject to the following restrictions:
  11. *
  12. * 1. The origin of this software must not be misrepresented; you must not
  13. * claim that you wrote the original software. If you use this software
  14. * in a product, an acknowledgment in the product documentation would be
  15. * appreciated but is not required.
  16. *
  17. * 2. Altered source versions must be plainly marked as such, and must not be
  18. * misrepresented as being the original software.
  19. *
  20. * 3. This notice may not be removed or altered from any source distribution.
  21. */
  22. /* Sccsid @(#)iblok.h 1.5 (gritter) 7/16/04 */
  23. /*
  24. * Functions to read a file sequentially.
  25. */
  26. #include <sys/types.h> /* for off_t, pid_t */
  27. #include <stdio.h> /* for EOF */
  28. #include <wchar.h> /* for wchar_t */
  29. #include <limits.h> /* for MB_LEN_MAX */
  30. struct iblok {
  31. long long ib_endoff; /* offset of endc from start of file */
  32. char ib_mbuf[MB_LEN_MAX+1]; /* multibyte overflow buffer */
  33. char *ib_mcur; /* next byte to read in ib_mbuf */
  34. char *ib_mend; /* one beyond last byte in ib_mbuf */
  35. char *ib_blk; /* buffered data */
  36. char *ib_cur; /* next character in ib_blk */
  37. char *ib_end; /* one beyond last byte in ib_blk */
  38. int ib_fd; /* input file descriptor */
  39. int ib_errno; /* errno on error, or 0 */
  40. int ib_incompl; /* had an incomplete last line */
  41. int ib_mb_cur_max; /* MB_CUR_MAX at time of ib_alloc() */
  42. int ib_seekable; /* had a successful lseek() */
  43. pid_t ib_pid; /* child from ib_popen() */
  44. unsigned ib_blksize; /* buffer size */
  45. };
  46. /*
  47. * Allocate an input buffer with file descriptor fd. blksize may be
  48. * either the size of a buffer to allocate in ib_blk, or 0 if the
  49. * size is determined automatically. On error, NULL is returned and
  50. * errno indicates the offending error.
  51. */
  52. extern struct iblok *ib_alloc(int fd, unsigned blksize);
  53. /*
  54. * Deallocate the passed input buffer. The file descriptor is not
  55. * closed.
  56. */
  57. extern void ib_free(struct iblok *ip);
  58. /*
  59. * Open file name and do ib_alloc() on the descriptor.
  60. */
  61. extern struct iblok *ib_open(const char *name, unsigned blksize);
  62. /*
  63. * Close the file descriptor in ip and do ib_free(). Return value is
  64. * the result of close().
  65. */
  66. extern int ib_close(struct iblok *ip);
  67. /*
  68. * A workalike of popen(cmd, "r") using iblok facilities.
  69. */
  70. extern struct iblok *ib_popen(const char *cmd, unsigned blksize);
  71. /*
  72. * Close an iblok opened with ib_popen().
  73. */
  74. extern int ib_pclose(struct iblok *ip);
  75. /*
  76. * Read new input buffer. Returns the next character (or EOF) and advances
  77. * ib_cur by one above the bottom of the buffer.
  78. */
  79. extern int ib_read(struct iblok *ip);
  80. /*
  81. * Get next character. Return EOF at end-of-file or read error.
  82. */
  83. #define ib_get(ip) ((ip)->ib_cur < (ip)->ib_end ? *(ip)->ib_cur++ & 0377 :\
  84. ib_read(ip))
  85. /*
  86. * Unget a character. Note that this implementation alters the read buffer.
  87. * Caution: Calling this macro more than once might underflow ib_blk.
  88. */
  89. #define ib_unget(c, ip) (*(--(ip)->ib_cur) = (char)(c))
  90. /*
  91. * Get file offset of last read character.
  92. */
  93. #define ib_offs(ip) ((ip)->ib_endoff - ((ip)->ib_end - (ip)->ib_cur - 1))
  94. /*
  95. * Read a wide character using ib_get() facilities. *wc is used to store
  96. * the wide character, or WEOF if an invalid byte sequence was found.
  97. * The number of bytes consumed is stored in *len. Return value is the
  98. * corresponding byte sequence, or NULL at end-of-file in input.
  99. *
  100. * Note that it is not possible to mix calls to ib_getw() with calls to
  101. * ib_get(), ib_unget() or ib_seek() unless the last character read by
  102. * ib_getw() was L'\n'.
  103. */
  104. extern char *ib_getw(struct iblok *ip, wint_t *wc, int *len);
  105. /*
  106. * Get a line from ip, returning the line length. Further arguments are either
  107. * the pointer to a malloc()ed buffer and a pointer to its size, or (NULL, 0)
  108. * if ib_getlin() shall allocate the buffer itselves. ib_getlin() will use
  109. * the realloc-style function reallc() to increase the buffer if necessary;
  110. * this function is expected never to fail (i. e., it must longjmp() or abort
  111. * if it cannot allocate a buffer of the demanded size).
  112. * On end-of-file or error, 0 is returned.
  113. */
  114. extern size_t ib_getlin(struct iblok *ip, char **line, size_t *alcd,
  115. void *(*reallc)(void *, size_t));
  116. /*
  117. * Like lseek().
  118. */
  119. extern off_t ib_seek(struct iblok *ip, off_t off, int whence);