html2input.sh 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. #!/bin/sh
  2. # vi: ft=awk :
  3. #
  4. # Script to extract functions and external variables off SUS html docs
  5. #
  6. # Copyright (C) 2010 Bernhard Reutner-Fischer
  7. # Public Domain
  8. # Usage:
  9. # wget http://www.opengroup.org/onlinepubs/9699919799/download/susv4.tgz
  10. # tar xzf susv4.tgz
  11. # SUS=susv4 html2input.sh -vFULL_DECLARATIONS=1
  12. # or
  13. # SUS=susv4 html2input.sh -vFULL_DECLARATIONS=0 -vSTDNAME=SUSv4
  14. #
  15. # Bug in time.h.html of SUSv4:
  16. # It inconsistently reads "as variables" instead of "external variables" that
  17. # is used everywhere except in time.h.html
  18. test "x$SUS" = "x" && SUS="susv4"
  19. test "x$AWK" = "x" && AWK="AWK"
  20. test "x$GREP" = "x" && GREP="GREP"
  21. for h in \
  22. $($GREP -l "shall be declared as functions" $SUS/basedefs/*.h.html) \
  23. $($GREP -l "shall declare the following as variables" $SUS/basedefs/*.h.html) \
  24. $($GREP -l "shall declare the following external variables" $SUS/basedefs/*.h.html)
  25. do
  26. $AWK $* '
  27. function get_filename () {
  28. if (NR == 1) {
  29. x=FILENAME
  30. sub(".*/", "", x)
  31. split(x, f , ".")
  32. fname=f[1]
  33. if (STDNAME)
  34. fname=fname "." STDNAME
  35. fname=fname ".in"
  36. printf "" > fname
  37. }
  38. }
  39. function unhtml (l) {
  40. sub("<tt>", "", l)
  41. sub("</tt>", "", l)
  42. sub("<sup>", "", l)
  43. sub("</sup>", "", l)
  44. sub("<a [^>]*>", "", l)
  45. sub("</a>", "", l)
  46. if (l ~ /<img[^>]*Option[[:space:]][[:space:]]*Start[^>]*>/) {
  47. sub("<img[^>]*>", "[Option Start]", l)
  48. } else if (l ~ /<img[^>]*Option[[:space:]][[:space:]]*End[^>]*>/) {
  49. sub("<img[^>]*>", "[Option End]", l)
  50. }
  51. sub("<.*>", "", l)
  52. return l
  53. }
  54. function get_funcname (l) {
  55. if (FULL_DECLARATIONS)
  56. return l
  57. if (l !~ /;$/)
  58. return l
  59. cnt = split(l, foo, " ")
  60. if (cnt >= 2 && foo[2] ~ /^\(\*/) {
  61. cnt = split(l, foo, "(")
  62. # good enough for signal() and sigset()
  63. if (cnt >= 2)
  64. l=foo[2]
  65. } else {
  66. sub("\\(.*", "", l)
  67. }
  68. gsub("[[\\]\\*]", "", l)
  69. i = split(l, a, " ")
  70. if (i)
  71. l = a[i]
  72. return l
  73. }
  74. function get_varname (l) {
  75. if (FULL_DECLARATIONS)
  76. return l
  77. if (l !~ /;$/)
  78. return l
  79. gsub(",[[:space:]][[:space:]]*", ",", l)
  80. sub(";$", "", l)
  81. i = split(l, a, " ")
  82. if (i)
  83. l = a[i]
  84. gsub("[[\\]\\*]", "", l)
  85. gsub(",", "\n", l)
  86. return l
  87. }
  88. BEGIN{data=0;l=""}
  89. get_filename()
  90. /shall be declared as functions/{data=1;isvar=0;next;}
  91. /shall declare the following as variables/{data=1;isvar=1;next;}
  92. /shall declare the following external variables/{data=1;isvar=1;next;}
  93. /<pre>/{data++;next;}
  94. /<\/pre>/{data=0;next;}
  95. /.*/{
  96. if (data == 2 && fname) {
  97. tmp = $0
  98. sub("^[[:space:]][[:space:]]*", " ", tmp)
  99. l = l tmp
  100. tmp = unhtml(l)
  101. if (!tmp)
  102. next
  103. l = tmp
  104. if (tmp !~ /;$/ && tmp !~ />$/ &&
  105. tmp !~ /Option Start\]$/ && tmp !~ /Option End\]$/)
  106. next
  107. if (!isvar)
  108. l = get_funcname(l)
  109. else
  110. l = get_varname(l)
  111. if (l)
  112. print l >> fname
  113. l=""
  114. }
  115. }
  116. ' $h
  117. done