rx.c 192 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536
  1. /* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
  2. This file is part of the librx library.
  3. Librx is free software; you can redistribute it and/or modify it under
  4. the terms of the GNU Library General Public License as published by
  5. the Free Software Foundation; either version 2, or (at your option)
  6. any later version.
  7. Librx is distributed in the hope that it will be useful, but WITHOUT
  8. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  9. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  10. for more details.
  11. You should have received a copy of the GNU Library General Public
  12. License along with this software; see the file COPYING.LIB. If not,
  13. write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA
  14. 02139, USA. */
  15. /* NOTE!!! AIX is so losing it requires this to be the first thing in the
  16. * file.
  17. * Do not put ANYTHING before it!
  18. */
  19. #if !defined (__GNUC__) && defined (_AIX)
  20. #pragma alloca
  21. #endif
  22. /* To make linux happy? */
  23. #ifndef _GNU_SOURCE
  24. #define _GNU_SOURCE
  25. #endif
  26. #include <stdlib.h>
  27. #include <stdio.h>
  28. #include <string.h>
  29. #include <ctype.h>
  30. #ifndef isgraph
  31. #define isgraph(c) (isprint (c) && !isspace (c))
  32. #endif
  33. #ifndef isblank
  34. #define isblank(c) ((c) == ' ' || (c) == '\t')
  35. #endif
  36. #include <sys/types.h>
  37. #undef MAX
  38. #undef MIN
  39. #define MAX(a, b) ((a) > (b) ? (a) : (b))
  40. #define MIN(a, b) ((a) < (b) ? (a) : (b))
  41. typedef char boolean;
  42. #define false 0
  43. #define true 1
  44. #ifndef __GCC__
  45. #undef __inline__
  46. #define __inline__
  47. #endif
  48. /* Emacs already defines alloca, sometimes. */
  49. #ifndef alloca
  50. /* Make alloca work the best possible way. */
  51. #ifdef __GNUC__
  52. #define alloca __builtin_alloca
  53. #else /* not __GNUC__ */
  54. #if HAVE_ALLOCA_H
  55. #include <alloca.h>
  56. #else /* not __GNUC__ or HAVE_ALLOCA_H */
  57. #ifndef _AIX /* Already did AIX, up at the top. */
  58. char *alloca ();
  59. #endif /* not _AIX */
  60. #endif /* not HAVE_ALLOCA_H */
  61. #endif /* not __GNUC__ */
  62. #endif /* not alloca */
  63. /* Memory management and stuff for emacs. */
  64. #define CHARBITS 8
  65. #define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
  66. /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
  67. * use `alloca' instead of `malloc' for the backtracking stack.
  68. *
  69. * Emacs will die miserably if we don't do this.
  70. */
  71. #ifdef REGEX_MALLOC
  72. #define REGEX_ALLOCATE malloc
  73. #else /* not REGEX_MALLOC */
  74. #define REGEX_ALLOCATE alloca
  75. #endif /* not REGEX_MALLOC */
  76. #ifdef RX_WANT_RX_DEFS
  77. #define RX_DECL extern
  78. #define RX_DEF_QUAL
  79. #else
  80. #define RX_WANT_RX_DEFS
  81. #define RX_DECL static
  82. #define RX_DEF_QUAL static
  83. #endif
  84. #include <regex.h>
  85. #undef RX_DECL
  86. #define RX_DECL RX_DEF_QUAL
  87. /*
  88. * Prototypes.
  89. */
  90. #ifdef __STDC__
  91. RX_DECL struct rx_hash_item
  92. *rx_hash_find (struct rx_hash *, unsigned long,
  93. void *, struct rx_hash_rules *);
  94. RX_DECL struct rx_hash_item
  95. *rx_hash_find (struct rx_hash *, unsigned long,
  96. void *, struct rx_hash_rules *);
  97. RX_DECL struct rx_hash_item
  98. *rx_hash_store (struct rx_hash *, unsigned long,
  99. void *, struct rx_hash_rules *);
  100. RX_DECL void rx_hash_free (struct rx_hash_item *,
  101. struct rx_hash_rules *);
  102. RX_DECL void rx_free_hash_table (struct rx_hash *, rx_hash_freefn,
  103. struct rx_hash_rules *);
  104. RX_DECL rx_Bitset
  105. rx_cset (struct rx *);
  106. RX_DECL rx_Bitset
  107. rx_copy_cset (struct rx *, rx_Bitset);
  108. RX_DECL void rx_free_cset (struct rx *, rx_Bitset);
  109. static struct rx_hash_item
  110. *compiler_hash_item_alloc (struct rx_hash_rules *, void *);
  111. static struct rx_hash
  112. *compiler_hash_alloc (struct rx_hash_rules *);
  113. static void compiler_free_hash (struct rx_hash *,
  114. struct rx_hash_rules *);
  115. static void compiler_free_hash_item (struct rx_hash_item *,
  116. struct rx_hash_rules *);
  117. RX_DECL struct rexp_node
  118. *rexp_node (struct rx *, enum rexp_node_type);
  119. RX_DECL struct rexp_node
  120. *rx_mk_r_cset (struct rx *, rx_Bitset);
  121. RX_DECL struct rexp_node
  122. *rx_mk_r_concat (struct rx *, struct rexp_node *,
  123. struct rexp_node *);
  124. RX_DECL struct rexp_node
  125. *rx_mk_r_alternate (struct rx *, struct rexp_node *,
  126. struct rexp_node *);
  127. RX_DECL struct rexp_node
  128. *rx_mk_r_alternate (struct rx *, struct rexp_node *,
  129. struct rexp_node *);
  130. RX_DECL struct rexp_node
  131. *rx_mk_r_opt (struct rx *, struct rexp_node *);
  132. RX_DECL struct rexp_node
  133. *rx_mk_r_star (struct rx *, struct rexp_node *);
  134. RX_DECL struct rexp_node
  135. *rx_mk_r_2phase_star (struct rx *, struct rexp_node *,
  136. struct rexp_node *);
  137. RX_DECL struct rexp_node
  138. *rx_mk_r_side_effect (struct rx *, rx_side_effect);
  139. //RX_DECL struct rexp_node
  140. // *rx_mk_r_data (struct rx *, void *);
  141. RX_DECL void rx_free_rexp (struct rx *, struct rexp_node *);
  142. RX_DECL struct rexp_node
  143. *rx_copy_rexp (struct rx *, struct rexp_node *);
  144. RX_DECL struct rx_nfa_state
  145. *rx_nfa_state (struct rx *);
  146. RX_DECL void rx_free_nfa_state (struct rx_nfa_state *);
  147. RX_DECL struct rx_nfa_state
  148. *rx_id_to_nfa_state (struct rx *, int);
  149. RX_DECL struct rx_nfa_edge
  150. *rx_nfa_edge (struct rx *, enum rx_nfa_etype,
  151. struct rx_nfa_state *,
  152. struct rx_nfa_state *);
  153. RX_DECL void rx_free_nfa_edge (struct rx_nfa_edge *);
  154. static struct rx_possible_future
  155. *rx_possible_future (struct rx *, struct rx_se_list *);
  156. static void rx_free_possible_future (struct rx_possible_future *);
  157. RX_DECL void rx_free_nfa (struct rx *);
  158. RX_DECL int rx_build_nfa (struct rx *, struct rexp_node *,
  159. struct rx_nfa_state **,
  160. struct rx_nfa_state **);
  161. RX_DECL void rx_name_nfa_states (struct rx *);
  162. static int se_list_cmp (void *, void *);
  163. static int se_list_equal (void *, void *);
  164. static struct rx_se_list
  165. *hash_cons_se_prog (struct rx *, struct rx_hash *,
  166. void *, struct rx_se_list *);
  167. static struct rx_se_list
  168. *hash_se_prog (struct rx *, struct rx_hash *,
  169. struct rx_se_list *);
  170. static int nfa_set_cmp (void *, void *);
  171. static int nfa_set_equal (void *, void *);
  172. static struct rx_nfa_state_set
  173. *nfa_set_cons (struct rx *, struct rx_hash *,
  174. struct rx_nfa_state *,
  175. struct rx_nfa_state_set *);
  176. static struct rx_nfa_state_set
  177. *nfa_set_enjoin (struct rx *, struct rx_hash *,
  178. struct rx_nfa_state *,
  179. struct rx_nfa_state_set *);
  180. #endif
  181. #ifndef emacs
  182. #ifdef SYNTAX_TABLE
  183. extern char *re_syntax_table;
  184. #else /* not SYNTAX_TABLE */
  185. #ifndef RX_WANT_RX_DEFS
  186. RX_DECL char re_syntax_table[CHAR_SET_SIZE];
  187. #endif
  188. #ifdef __STDC__
  189. static void
  190. init_syntax_once (void)
  191. #else
  192. static void
  193. init_syntax_once ()
  194. #endif
  195. {
  196. register int c;
  197. static int done = 0;
  198. if (done)
  199. return;
  200. bzero (re_syntax_table, sizeof re_syntax_table);
  201. for (c = 'a'; c <= 'z'; c++)
  202. re_syntax_table[c] = Sword;
  203. for (c = 'A'; c <= 'Z'; c++)
  204. re_syntax_table[c] = Sword;
  205. for (c = '0'; c <= '9'; c++)
  206. re_syntax_table[c] = Sword;
  207. re_syntax_table['_'] = Sword;
  208. done = 1;
  209. }
  210. #endif /* not SYNTAX_TABLE */
  211. #endif /* not emacs */
  212. /* Compile with `-DRX_DEBUG' and use the following flags.
  213. *
  214. * Debugging flags:
  215. * rx_debug - print information as a regexp is compiled
  216. * rx_debug_trace - print information as a regexp is executed
  217. */
  218. #ifdef RX_DEBUG
  219. int rx_debug_compile = 0;
  220. int rx_debug_trace = 0;
  221. static struct re_pattern_buffer * dbug_rxb = 0;
  222. /*
  223. * More Prototypes
  224. */
  225. #ifdef __STDC__
  226. typedef void (*side_effect_printer) (struct rx *, void *, FILE *);
  227. static void print_cset (struct rx *, rx_Bitset, FILE *);
  228. static void print_rexp (struct rx *, struct rexp_node *, int,
  229. side_effect_printer, FILE *);
  230. static void print_nfa (struct rx *, struct rx_nfa_state *,
  231. side_effect_printer, FILE *);
  232. static void re_seprint (struct rx *, void *, FILE *);
  233. void print_compiled_pattern (struct re_pattern_buffer *);
  234. void print_fastmap (char *);
  235. #else
  236. typedef void (*side_effect_printer) ();
  237. static void print_cset ();
  238. #endif
  239. #ifdef __STDC__
  240. static void
  241. print_rexp (struct rx *rx,
  242. struct rexp_node *node, int depth,
  243. side_effect_printer seprint, FILE * fp)
  244. #else
  245. static void
  246. print_rexp (rx, node, depth, seprint, fp)
  247. struct rx *rx;
  248. struct rexp_node *node;
  249. int depth;
  250. side_effect_printer seprint;
  251. FILE * fp;
  252. #endif
  253. {
  254. if (!node)
  255. return;
  256. else
  257. {
  258. switch (node->type)
  259. {
  260. case r_cset:
  261. {
  262. fprintf (fp, "%*s", depth, "");
  263. print_cset (rx, node->params.cset, fp);
  264. fputc ('\n', fp);
  265. break;
  266. }
  267. case r_opt:
  268. case r_star:
  269. fprintf (fp, "%*s%s\n", depth, "",
  270. node->type == r_opt ? "opt" : "star");
  271. print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
  272. break;
  273. case r_2phase_star:
  274. fprintf (fp, "%*s2phase star\n", depth, "");
  275. print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
  276. print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
  277. break;
  278. case r_alternate:
  279. case r_concat:
  280. fprintf (fp, "%*s%s\n", depth, "",
  281. node->type == r_alternate ? "alt" : "concat");
  282. print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
  283. print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
  284. break;
  285. case r_side_effect:
  286. fprintf (fp, "%*sSide effect: ", depth, "");
  287. seprint (rx, node->params.side_effect, fp);
  288. fputc ('\n', fp);
  289. }
  290. }
  291. }
  292. #ifdef __STDC__
  293. static void
  294. print_nfa (struct rx * rx,
  295. struct rx_nfa_state * n,
  296. side_effect_printer seprint, FILE * fp)
  297. #else
  298. static void
  299. print_nfa (rx, n, seprint, fp)
  300. struct rx * rx;
  301. struct rx_nfa_state * n;
  302. side_effect_printer seprint;
  303. FILE * fp;
  304. #endif
  305. {
  306. while (n)
  307. {
  308. struct rx_nfa_edge *e = n->edges;
  309. struct rx_possible_future *ec = n->futures;
  310. fprintf (fp, "node %d %s\n", n->id,
  311. n->is_final ? "final" : (n->is_start ? "start" : ""));
  312. while (e)
  313. {
  314. fprintf (fp, " edge to %d, ", e->dest->id);
  315. switch (e->type)
  316. {
  317. case ne_epsilon:
  318. fprintf (fp, "epsilon\n");
  319. break;
  320. case ne_side_effect:
  321. fprintf (fp, "side effect ");
  322. seprint (rx, e->params.side_effect, fp);
  323. fputc ('\n', fp);
  324. break;
  325. case ne_cset:
  326. fprintf (fp, "cset ");
  327. print_cset (rx, e->params.cset, fp);
  328. fputc ('\n', fp);
  329. break;
  330. }
  331. e = e->next;
  332. }
  333. while (ec)
  334. {
  335. int x;
  336. struct rx_nfa_state_set * s;
  337. struct rx_se_list * l;
  338. fprintf (fp, " eclosure to {");
  339. for (s = ec->destset; s; s = s->cdr)
  340. fprintf (fp, "%d ", s->car->id);
  341. fprintf (fp, "} (");
  342. for (l = ec->effects; l; l = l->cdr)
  343. {
  344. seprint (rx, l->car, fp);
  345. fputc (' ', fp);
  346. }
  347. fprintf (fp, ")\n");
  348. ec = ec->next;
  349. }
  350. n = n->next;
  351. }
  352. }
  353. static char * efnames [] =
  354. {
  355. "bogon",
  356. "re_se_try",
  357. "re_se_pushback",
  358. "re_se_push0",
  359. "re_se_pushpos",
  360. "re_se_chkpos",
  361. "re_se_poppos",
  362. "re_se_at_dot",
  363. "re_se_syntax",
  364. "re_se_not_syntax",
  365. "re_se_begbuf",
  366. "re_se_hat",
  367. "re_se_wordbeg",
  368. "re_se_wordbound",
  369. "re_se_notwordbound",
  370. "re_se_wordend",
  371. "re_se_endbuf",
  372. "re_se_dollar",
  373. "re_se_fail",
  374. };
  375. static char * efnames2[] =
  376. {
  377. "re_se_win",
  378. "re_se_lparen",
  379. "re_se_rparen",
  380. "re_se_backref",
  381. "re_se_iter",
  382. "re_se_end_iter",
  383. "re_se_tv"
  384. };
  385. static char * inx_names[] =
  386. {
  387. "rx_backtrack_point",
  388. "rx_do_side_effects",
  389. "rx_cache_miss",
  390. "rx_next_char",
  391. "rx_backtrack",
  392. "rx_error_inx",
  393. "rx_num_instructions"
  394. };
  395. #ifdef __STDC__
  396. static void
  397. re_seprint (struct rx * rx, void * effect, FILE * fp)
  398. #else
  399. static void
  400. re_seprint (rx, effect, fp)
  401. struct rx * rx;
  402. void * effect;
  403. FILE * fp;
  404. #endif
  405. {
  406. if ((int)effect < 0)
  407. fputs (efnames[-(int)effect], fp);
  408. else if (dbug_rxb)
  409. {
  410. struct re_se_params * p = &dbug_rxb->se_params[(int)effect];
  411. fprintf (fp, "%s(%d,%d)", efnames2[p->se], p->op1, p->op2);
  412. }
  413. else
  414. fprintf (fp, "[complex op # %d]", (int)effect);
  415. }
  416. /* These are so the regex.c regression tests will compile. */
  417. void
  418. print_compiled_pattern (rxb)
  419. struct re_pattern_buffer * rxb;
  420. {
  421. }
  422. void
  423. print_fastmap (fm)
  424. char * fm;
  425. {
  426. }
  427. #endif /* RX_DEBUG */
  428. /* This page: Bitsets. Completely unintersting. */
  429. //RX_DECL int rx_bitset_is_equal (int, rx_Bitset, rx_Bitset);
  430. RX_DECL int rx_bitset_is_subset (int, rx_Bitset, rx_Bitset);
  431. //RX_DECL int rx_bitset_empty (int, rx_Bitset);
  432. RX_DECL void rx_bitset_null (int, rx_Bitset);
  433. RX_DECL void rx_bitset_complement (int, rx_Bitset);
  434. RX_DECL void rx_bitset_complement (int, rx_Bitset);
  435. RX_DECL void rx_bitset_assign (int, rx_Bitset, rx_Bitset);
  436. RX_DECL void rx_bitset_union (int, rx_Bitset, rx_Bitset);
  437. RX_DECL void rx_bitset_intersection (int, rx_Bitset, rx_Bitset);
  438. RX_DECL void rx_bitset_difference (int, rx_Bitset, rx_Bitset);
  439. //RX_DECL void rx_bitset_revdifference (int, rx_Bitset, rx_Bitset);
  440. #ifdef emacs
  441. RX_DECL void rx_bitset_xor (int, rx_Bitset, rx_Bitset);
  442. #endif
  443. RX_DECL unsigned long
  444. rx_bitset_hash (int, rx_Bitset);
  445. #if 0
  446. #ifdef __STDC__
  447. RX_DECL int
  448. rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b)
  449. #else
  450. RX_DECL int
  451. rx_bitset_is_equal (size, a, b)
  452. int size;
  453. rx_Bitset a;
  454. rx_Bitset b;
  455. #endif
  456. {
  457. int x;
  458. RX_subset s = b[0];
  459. b[0] = ~a[0];
  460. for (x = rx_bitset_numb_subsets(size) - 1; a[x] == b[x]; --x)
  461. ;
  462. b[0] = s;
  463. return !x && s == a[0];
  464. }
  465. #endif
  466. #ifdef __STDC__
  467. RX_DECL int
  468. rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b)
  469. #else
  470. RX_DECL int
  471. rx_bitset_is_subset (size, a, b)
  472. int size;
  473. rx_Bitset a;
  474. rx_Bitset b;
  475. #endif
  476. {
  477. int x = rx_bitset_numb_subsets(size) - 1;
  478. while (x-- && (a[x] & b[x]) == a[x]);
  479. return x == -1;
  480. }
  481. #if 0
  482. #ifdef __STDC__
  483. RX_DECL int
  484. rx_bitset_empty (int size, rx_Bitset set)
  485. #else
  486. RX_DECL int
  487. rx_bitset_empty (size, set)
  488. int size;
  489. rx_Bitset set;
  490. #endif
  491. {
  492. int x;
  493. RX_subset s = set[0];
  494. set[0] = 1;
  495. for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x)
  496. ;
  497. set[0] = s;
  498. return !s;
  499. }
  500. #endif
  501. #ifdef __STDC__
  502. RX_DECL void
  503. rx_bitset_null (int size, rx_Bitset b)
  504. #else
  505. RX_DECL void
  506. rx_bitset_null (size, b)
  507. int size;
  508. rx_Bitset b;
  509. #endif
  510. {
  511. bzero (b, rx_sizeof_bitset(size));
  512. }
  513. #ifdef __STDC__
  514. RX_DECL void
  515. rx_bitset_universe (int size, rx_Bitset b)
  516. #else
  517. RX_DECL void
  518. rx_bitset_universe (size, b)
  519. int size;
  520. rx_Bitset b;
  521. #endif
  522. {
  523. int x = rx_bitset_numb_subsets (size);
  524. while (x--)
  525. *b++ = ~(RX_subset)0;
  526. }
  527. #ifdef __STDC__
  528. RX_DECL void
  529. rx_bitset_complement (int size, rx_Bitset b)
  530. #else
  531. RX_DECL void
  532. rx_bitset_complement (size, b)
  533. int size;
  534. rx_Bitset b;
  535. #endif
  536. {
  537. int x = rx_bitset_numb_subsets (size);
  538. while (x--)
  539. {
  540. *b = ~*b;
  541. ++b;
  542. }
  543. }
  544. #ifdef __STDC__
  545. RX_DECL void
  546. rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b)
  547. #else
  548. RX_DECL void
  549. rx_bitset_assign (size, a, b)
  550. int size;
  551. rx_Bitset a;
  552. rx_Bitset b;
  553. #endif
  554. {
  555. int x;
  556. for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
  557. a[x] = b[x];
  558. }
  559. #ifdef __STDC__
  560. RX_DECL void
  561. rx_bitset_union (int size, rx_Bitset a, rx_Bitset b)
  562. #else
  563. RX_DECL void
  564. rx_bitset_union (size, a, b)
  565. int size;
  566. rx_Bitset a;
  567. rx_Bitset b;
  568. #endif
  569. {
  570. int x;
  571. for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
  572. a[x] |= b[x];
  573. }
  574. #ifdef __STDC__
  575. RX_DECL void
  576. rx_bitset_intersection (int size,
  577. rx_Bitset a, rx_Bitset b)
  578. #else
  579. RX_DECL void
  580. rx_bitset_intersection (size, a, b)
  581. int size;
  582. rx_Bitset a;
  583. rx_Bitset b;
  584. #endif
  585. {
  586. int x;
  587. for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
  588. a[x] &= b[x];
  589. }
  590. #ifdef __STDC__
  591. RX_DECL void
  592. rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b)
  593. #else
  594. RX_DECL void
  595. rx_bitset_difference (size, a, b)
  596. int size;
  597. rx_Bitset a;
  598. rx_Bitset b;
  599. #endif
  600. {
  601. int x;
  602. for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
  603. a[x] &= ~ b[x];
  604. }
  605. #if 0
  606. #ifdef __STDC__
  607. RX_DECL void
  608. rx_bitset_revdifference (int size,
  609. rx_Bitset a, rx_Bitset b)
  610. #else
  611. RX_DECL void
  612. rx_bitset_revdifference (size, a, b)
  613. int size;
  614. rx_Bitset a;
  615. rx_Bitset b;
  616. #endif
  617. {
  618. int x;
  619. for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
  620. a[x] = ~a[x] & b[x];
  621. }
  622. #endif
  623. #ifdef emacs
  624. #ifdef __STDC__
  625. RX_DECL void
  626. rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b)
  627. #else
  628. RX_DECL void
  629. rx_bitset_xor (size, a, b)
  630. int size;
  631. rx_Bitset a;
  632. rx_Bitset b;
  633. #endif
  634. {
  635. int x;
  636. for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
  637. a[x] ^= b[x];
  638. }
  639. #endif
  640. #ifdef __STDC__
  641. RX_DECL unsigned long
  642. rx_bitset_hash (int size, rx_Bitset b)
  643. #else
  644. RX_DECL unsigned long
  645. rx_bitset_hash (size, b)
  646. int size;
  647. rx_Bitset b;
  648. #endif
  649. {
  650. int x;
  651. unsigned long hash = (unsigned long)rx_bitset_hash;
  652. for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
  653. hash ^= rx_bitset_subset_val(b, x);
  654. return hash;
  655. }
  656. RX_DECL RX_subset rx_subset_singletons [RX_subset_bits] =
  657. {
  658. 0x1,
  659. 0x2,
  660. 0x4,
  661. 0x8,
  662. 0x10,
  663. 0x20,
  664. 0x40,
  665. 0x80,
  666. 0x100,
  667. 0x200,
  668. 0x400,
  669. 0x800,
  670. 0x1000,
  671. 0x2000,
  672. 0x4000,
  673. 0x8000,
  674. 0x10000,
  675. 0x20000,
  676. 0x40000,
  677. 0x80000,
  678. 0x100000,
  679. 0x200000,
  680. 0x400000,
  681. 0x800000,
  682. 0x1000000,
  683. 0x2000000,
  684. 0x4000000,
  685. 0x8000000,
  686. 0x10000000,
  687. 0x20000000,
  688. 0x40000000,
  689. 0x80000000
  690. };
  691. #ifdef RX_DEBUG
  692. #ifdef __STDC__
  693. static void
  694. print_cset (struct rx *rx, rx_Bitset cset, FILE * fp)
  695. #else
  696. static void
  697. print_cset (rx, cset, fp)
  698. struct rx *rx;
  699. rx_Bitset cset;
  700. FILE * fp;
  701. #endif
  702. {
  703. int x;
  704. fputc ('[', fp);
  705. for (x = 0; x < rx->local_cset_size; ++x)
  706. if (RX_bitset_member (cset, x))
  707. {
  708. if (isprint(x))
  709. fputc (x, fp);
  710. else
  711. fprintf (fp, "\\0%o ", x);
  712. }
  713. fputc (']', fp);
  714. }
  715. #endif /* RX_DEBUG */
  716. static unsigned long rx_hash_masks[4] =
  717. {
  718. 0x12488421,
  719. 0x96699669,
  720. 0xbe7dd7eb,
  721. 0xffffffff
  722. };
  723. /* Hash tables */
  724. #ifdef __STDC__
  725. RX_DECL struct rx_hash_item *
  726. rx_hash_find (struct rx_hash * table,
  727. unsigned long hash,
  728. void * value,
  729. struct rx_hash_rules * rules)
  730. #else
  731. RX_DECL struct rx_hash_item *
  732. rx_hash_find (table, hash, value, rules)
  733. struct rx_hash * table;
  734. unsigned long hash;
  735. void * value;
  736. struct rx_hash_rules * rules;
  737. #endif
  738. {
  739. rx_hash_eq eq = rules->eq;
  740. int maskc = 0;
  741. long mask = rx_hash_masks [0];
  742. int bucket = (hash & mask) % 13;
  743. while (table->children [bucket])
  744. {
  745. table = table->children [bucket];
  746. ++maskc;
  747. mask = rx_hash_masks[maskc];
  748. bucket = (hash & mask) % 13;
  749. }
  750. {
  751. struct rx_hash_item * it = table->buckets[bucket];
  752. while (it)
  753. if (eq (it->data, value))
  754. return it;
  755. else
  756. it = it->next_same_hash;
  757. }
  758. return 0;
  759. }
  760. #ifdef __STDC__
  761. RX_DECL struct rx_hash_item *
  762. rx_hash_store (struct rx_hash * table,
  763. unsigned long hash,
  764. void * value,
  765. struct rx_hash_rules * rules)
  766. #else
  767. RX_DECL struct rx_hash_item *
  768. rx_hash_store (table, hash, value, rules)
  769. struct rx_hash * table;
  770. unsigned long hash;
  771. void * value;
  772. struct rx_hash_rules * rules;
  773. #endif
  774. {
  775. rx_hash_eq eq = rules->eq;
  776. int maskc = 0;
  777. long mask = rx_hash_masks[0];
  778. int bucket = (hash & mask) % 13;
  779. int depth = 0;
  780. while (table->children [bucket])
  781. {
  782. table = table->children [bucket];
  783. ++maskc;
  784. mask = rx_hash_masks[maskc];
  785. bucket = (hash & mask) % 13;
  786. ++depth;
  787. }
  788. {
  789. struct rx_hash_item * it = table->buckets[bucket];
  790. while (it)
  791. if (eq (it->data, value))
  792. return it;
  793. else
  794. it = it->next_same_hash;
  795. }
  796. {
  797. if ( (depth < 3)
  798. && (table->bucket_size [bucket] >= 4))
  799. {
  800. struct rx_hash * newtab = ((struct rx_hash *)
  801. rules->hash_alloc (rules));
  802. if (!newtab)
  803. goto add_to_bucket;
  804. bzero (newtab, sizeof (*newtab));
  805. newtab->parent = table;
  806. {
  807. struct rx_hash_item * them = table->buckets[bucket];
  808. unsigned long newmask = rx_hash_masks[maskc + 1];
  809. while (them)
  810. {
  811. struct rx_hash_item * save = them->next_same_hash;
  812. int new_buck = (them->hash & newmask) % 13;
  813. them->next_same_hash = newtab->buckets[new_buck];
  814. newtab->buckets[new_buck] = them;
  815. them->table = newtab;
  816. them = save;
  817. ++newtab->bucket_size[new_buck];
  818. ++newtab->refs;
  819. }
  820. table->refs = (table->refs - table->bucket_size[bucket] + 1);
  821. table->bucket_size[bucket] = 0;
  822. table->buckets[bucket] = 0;
  823. table->children[bucket] = newtab;
  824. table = newtab;
  825. bucket = (hash & newmask) % 13;
  826. }
  827. }
  828. }
  829. add_to_bucket:
  830. {
  831. struct rx_hash_item * it = ((struct rx_hash_item *)
  832. rules->hash_item_alloc (rules, value));
  833. if (!it)
  834. return 0;
  835. it->hash = hash;
  836. it->table = table;
  837. /* DATA and BINDING are to be set in hash_item_alloc */
  838. it->next_same_hash = table->buckets [bucket];
  839. table->buckets[bucket] = it;
  840. ++table->bucket_size [bucket];
  841. ++table->refs;
  842. return it;
  843. }
  844. }
  845. #ifdef __STDC__
  846. RX_DECL void
  847. rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules)
  848. #else
  849. RX_DECL void
  850. rx_hash_free (it, rules)
  851. struct rx_hash_item * it;
  852. struct rx_hash_rules * rules;
  853. #endif
  854. {
  855. if (it)
  856. {
  857. struct rx_hash * table = it->table;
  858. unsigned long hash = it->hash;
  859. int depth = (table->parent
  860. ? (table->parent->parent
  861. ? (table->parent->parent->parent
  862. ? 3
  863. : 2)
  864. : 1)
  865. : 0);
  866. int bucket = (hash & rx_hash_masks [depth]) % 13;
  867. struct rx_hash_item ** pos = &table->buckets [bucket];
  868. while (*pos != it)
  869. pos = &(*pos)->next_same_hash;
  870. *pos = it->next_same_hash;
  871. rules->free_hash_item (it, rules);
  872. --table->bucket_size[bucket];
  873. --table->refs;
  874. while (!table->refs && depth)
  875. {
  876. struct rx_hash * save = table;
  877. table = table->parent;
  878. --depth;
  879. bucket = (hash & rx_hash_masks [depth]) % 13;
  880. --table->refs;
  881. table->children[bucket] = 0;
  882. rules->free_hash (save, rules);
  883. }
  884. }
  885. }
  886. #ifdef __STDC__
  887. RX_DECL void
  888. rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn,
  889. struct rx_hash_rules * rules)
  890. #else
  891. RX_DECL void
  892. rx_free_hash_table (tab, freefn, rules)
  893. struct rx_hash * tab;
  894. rx_hash_freefn freefn;
  895. struct rx_hash_rules * rules;
  896. #endif
  897. {
  898. int x;
  899. for (x = 0; x < 13; ++x)
  900. if (tab->children[x])
  901. {
  902. rx_free_hash_table (tab->children[x], freefn, rules);
  903. rules->free_hash (tab->children[x], rules);
  904. }
  905. else
  906. {
  907. struct rx_hash_item * them = tab->buckets[x];
  908. while (them)
  909. {
  910. struct rx_hash_item * that = them;
  911. them = that->next_same_hash;
  912. freefn (that);
  913. rules->free_hash_item (that, rules);
  914. }
  915. }
  916. }
  917. /* Utilities for manipulating bitset represntations of characters sets. */
  918. #ifdef __STDC__
  919. RX_DECL rx_Bitset
  920. rx_cset (struct rx *rx)
  921. #else
  922. RX_DECL rx_Bitset
  923. rx_cset (rx)
  924. struct rx *rx;
  925. #endif
  926. {
  927. rx_Bitset b = (rx_Bitset) malloc (rx_sizeof_bitset (rx->local_cset_size));
  928. if (b)
  929. rx_bitset_null (rx->local_cset_size, b);
  930. return b;
  931. }
  932. #ifdef __STDC__
  933. RX_DECL rx_Bitset
  934. rx_copy_cset (struct rx *rx, rx_Bitset a)
  935. #else
  936. RX_DECL rx_Bitset
  937. rx_copy_cset (rx, a)
  938. struct rx *rx;
  939. rx_Bitset a;
  940. #endif
  941. {
  942. rx_Bitset cs = rx_cset (rx);
  943. if (cs)
  944. rx_bitset_union (rx->local_cset_size, cs, a);
  945. return cs;
  946. }
  947. #ifdef __STDC__
  948. RX_DECL void
  949. rx_free_cset (struct rx * rx, rx_Bitset c)
  950. #else
  951. RX_DECL void
  952. rx_free_cset (rx, c)
  953. struct rx * rx;
  954. rx_Bitset c;
  955. #endif
  956. {
  957. if (c)
  958. free ((char *)c);
  959. }
  960. /* Hash table memory allocation policy for the regexp compiler */
  961. #ifdef __STDC__
  962. static struct rx_hash *
  963. compiler_hash_alloc (struct rx_hash_rules * rules)
  964. #else
  965. static struct rx_hash *
  966. compiler_hash_alloc (rules)
  967. struct rx_hash_rules * rules;
  968. #endif
  969. {
  970. return (struct rx_hash *)malloc (sizeof (struct rx_hash));
  971. }
  972. #ifdef __STDC__
  973. static struct rx_hash_item *
  974. compiler_hash_item_alloc (struct rx_hash_rules * rules, void * value)
  975. #else
  976. static struct rx_hash_item *
  977. compiler_hash_item_alloc (rules, value)
  978. struct rx_hash_rules * rules;
  979. void * value;
  980. #endif
  981. {
  982. struct rx_hash_item * it;
  983. it = (struct rx_hash_item *)malloc (sizeof (*it));
  984. if (it)
  985. {
  986. it->data = value;
  987. it->binding = 0;
  988. }
  989. return it;
  990. }
  991. #ifdef __STDC__
  992. static void
  993. compiler_free_hash (struct rx_hash * tab,
  994. struct rx_hash_rules * rules)
  995. #else
  996. static void
  997. compiler_free_hash (tab, rules)
  998. struct rx_hash * tab;
  999. struct rx_hash_rules * rules;
  1000. #endif
  1001. {
  1002. free ((char *)tab);
  1003. }
  1004. #ifdef __STDC__
  1005. static void
  1006. compiler_free_hash_item (struct rx_hash_item * item,
  1007. struct rx_hash_rules * rules)
  1008. #else
  1009. static void
  1010. compiler_free_hash_item (item, rules)
  1011. struct rx_hash_item * item;
  1012. struct rx_hash_rules * rules;
  1013. #endif
  1014. {
  1015. free ((char *)item);
  1016. }
  1017. /* This page: REXP_NODE (expression tree) structures. */
  1018. #ifdef __STDC__
  1019. RX_DECL struct rexp_node *
  1020. rexp_node (struct rx *rx,
  1021. enum rexp_node_type type)
  1022. #else
  1023. RX_DECL struct rexp_node *
  1024. rexp_node (rx, type)
  1025. struct rx *rx;
  1026. enum rexp_node_type type;
  1027. #endif
  1028. {
  1029. struct rexp_node *n;
  1030. n = (struct rexp_node *)malloc (sizeof (*n));
  1031. if (n)
  1032. {
  1033. bzero (n, sizeof (*n));
  1034. n->type = type;
  1035. }
  1036. return n;
  1037. }
  1038. /* free_rexp_node assumes that the bitset passed to rx_mk_r_cset
  1039. * can be freed using rx_free_cset.
  1040. */
  1041. #ifdef __STDC__
  1042. RX_DECL struct rexp_node *
  1043. rx_mk_r_cset (struct rx * rx,
  1044. rx_Bitset b)
  1045. #else
  1046. RX_DECL struct rexp_node *
  1047. rx_mk_r_cset (rx, b)
  1048. struct rx * rx;
  1049. rx_Bitset b;
  1050. #endif
  1051. {
  1052. struct rexp_node * n = rexp_node (rx, r_cset);
  1053. if (n)
  1054. n->params.cset = b;
  1055. return n;
  1056. }
  1057. #ifdef __STDC__
  1058. RX_DECL struct rexp_node *
  1059. rx_mk_r_concat (struct rx * rx,
  1060. struct rexp_node * a,
  1061. struct rexp_node * b)
  1062. #else
  1063. RX_DECL struct rexp_node *
  1064. rx_mk_r_concat (rx, a, b)
  1065. struct rx * rx;
  1066. struct rexp_node * a;
  1067. struct rexp_node * b;
  1068. #endif
  1069. {
  1070. struct rexp_node * n = rexp_node (rx, r_concat);
  1071. if (n)
  1072. {
  1073. n->params.pair.left = a;
  1074. n->params.pair.right = b;
  1075. }
  1076. return n;
  1077. }
  1078. #ifdef __STDC__
  1079. RX_DECL struct rexp_node *
  1080. rx_mk_r_alternate (struct rx * rx,
  1081. struct rexp_node * a,
  1082. struct rexp_node * b)
  1083. #else
  1084. RX_DECL struct rexp_node *
  1085. rx_mk_r_alternate (rx, a, b)
  1086. struct rx * rx;
  1087. struct rexp_node * a;
  1088. struct rexp_node * b;
  1089. #endif
  1090. {
  1091. struct rexp_node * n = rexp_node (rx, r_alternate);
  1092. if (n)
  1093. {
  1094. n->params.pair.left = a;
  1095. n->params.pair.right = b;
  1096. }
  1097. return n;
  1098. }
  1099. #ifdef __STDC__
  1100. RX_DECL struct rexp_node *
  1101. rx_mk_r_opt (struct rx * rx,
  1102. struct rexp_node * a)
  1103. #else
  1104. RX_DECL struct rexp_node *
  1105. rx_mk_r_opt (rx, a)
  1106. struct rx * rx;
  1107. struct rexp_node * a;
  1108. #endif
  1109. {
  1110. struct rexp_node * n = rexp_node (rx, r_opt);
  1111. if (n)
  1112. {
  1113. n->params.pair.left = a;
  1114. n->params.pair.right = 0;
  1115. }
  1116. return n;
  1117. }
  1118. #ifdef __STDC__
  1119. RX_DECL struct rexp_node *
  1120. rx_mk_r_star (struct rx * rx,
  1121. struct rexp_node * a)
  1122. #else
  1123. RX_DECL struct rexp_node *
  1124. rx_mk_r_star (rx, a)
  1125. struct rx * rx;
  1126. struct rexp_node * a;
  1127. #endif
  1128. {
  1129. struct rexp_node * n = rexp_node (rx, r_star);
  1130. if (n)
  1131. {
  1132. n->params.pair.left = a;
  1133. n->params.pair.right = 0;
  1134. }
  1135. return n;
  1136. }
  1137. #ifdef __STDC__
  1138. RX_DECL struct rexp_node *
  1139. rx_mk_r_2phase_star (struct rx * rx,
  1140. struct rexp_node * a,
  1141. struct rexp_node * b)
  1142. #else
  1143. RX_DECL struct rexp_node *
  1144. rx_mk_r_2phase_star (rx, a, b)
  1145. struct rx * rx;
  1146. struct rexp_node * a;
  1147. struct rexp_node * b;
  1148. #endif
  1149. {
  1150. struct rexp_node * n = rexp_node (rx, r_2phase_star);
  1151. if (n)
  1152. {
  1153. n->params.pair.left = a;
  1154. n->params.pair.right = b;
  1155. }
  1156. return n;
  1157. }
  1158. #ifdef __STDC__
  1159. RX_DECL struct rexp_node *
  1160. rx_mk_r_side_effect (struct rx * rx,
  1161. rx_side_effect a)
  1162. #else
  1163. RX_DECL struct rexp_node *
  1164. rx_mk_r_side_effect (rx, a)
  1165. struct rx * rx;
  1166. rx_side_effect a;
  1167. #endif
  1168. {
  1169. struct rexp_node * n = rexp_node (rx, r_side_effect);
  1170. if (n)
  1171. {
  1172. n->params.side_effect = a;
  1173. n->params.pair.right = 0;
  1174. }
  1175. return n;
  1176. }
  1177. #if 0
  1178. #ifdef __STDC__
  1179. RX_DECL struct rexp_node *
  1180. rx_mk_r_data (struct rx * rx,
  1181. void * a)
  1182. #else
  1183. RX_DECL struct rexp_node *
  1184. rx_mk_r_data (rx, a)
  1185. struct rx * rx;
  1186. void * a;
  1187. #endif
  1188. {
  1189. struct rexp_node * n = rexp_node (rx, r_data);
  1190. if (n)
  1191. {
  1192. n->params.pair.left = a;
  1193. n->params.pair.right = 0;
  1194. }
  1195. return n;
  1196. }
  1197. #endif
  1198. #ifdef __STDC__
  1199. RX_DECL void
  1200. rx_free_rexp (struct rx * rx, struct rexp_node * node)
  1201. #else
  1202. RX_DECL void
  1203. rx_free_rexp (rx, node)
  1204. struct rx * rx;
  1205. struct rexp_node * node;
  1206. #endif
  1207. {
  1208. if (node)
  1209. {
  1210. switch (node->type)
  1211. {
  1212. case r_cset:
  1213. if (node->params.cset)
  1214. rx_free_cset (rx, node->params.cset);
  1215. case r_side_effect:
  1216. break;
  1217. case r_concat:
  1218. case r_alternate:
  1219. case r_2phase_star:
  1220. case r_opt:
  1221. case r_star:
  1222. rx_free_rexp (rx, node->params.pair.left);
  1223. rx_free_rexp (rx, node->params.pair.right);
  1224. break;
  1225. case r_data:
  1226. /* This shouldn't occur. */
  1227. break;
  1228. }
  1229. free ((char *)node);
  1230. }
  1231. }
  1232. #ifdef __STDC__
  1233. RX_DECL struct rexp_node *
  1234. rx_copy_rexp (struct rx *rx,
  1235. struct rexp_node *node)
  1236. #else
  1237. RX_DECL struct rexp_node *
  1238. rx_copy_rexp (rx, node)
  1239. struct rx *rx;
  1240. struct rexp_node *node;
  1241. #endif
  1242. {
  1243. if (!node)
  1244. return 0;
  1245. else
  1246. {
  1247. struct rexp_node *n = rexp_node (rx, node->type);
  1248. if (!n)
  1249. return 0;
  1250. switch (node->type)
  1251. {
  1252. case r_cset:
  1253. n->params.cset = rx_copy_cset (rx, node->params.cset);
  1254. if (!n->params.cset)
  1255. {
  1256. rx_free_rexp (rx, n);
  1257. return 0;
  1258. }
  1259. break;
  1260. case r_side_effect:
  1261. n->params.side_effect = node->params.side_effect;
  1262. break;
  1263. case r_concat:
  1264. case r_alternate:
  1265. case r_opt:
  1266. case r_2phase_star:
  1267. case r_star:
  1268. n->params.pair.left =
  1269. rx_copy_rexp (rx, node->params.pair.left);
  1270. n->params.pair.right =
  1271. rx_copy_rexp (rx, node->params.pair.right);
  1272. if ( (node->params.pair.left && !n->params.pair.left)
  1273. || (node->params.pair.right && !n->params.pair.right))
  1274. {
  1275. rx_free_rexp (rx, n);
  1276. return 0;
  1277. }
  1278. break;
  1279. case r_data:
  1280. /* shouldn't happen */
  1281. break;
  1282. }
  1283. return n;
  1284. }
  1285. }
  1286. /* This page: functions to build and destroy graphs that describe nfa's */
  1287. /* Constructs a new nfa node. */
  1288. #ifdef __STDC__
  1289. RX_DECL struct rx_nfa_state *
  1290. rx_nfa_state (struct rx *rx)
  1291. #else
  1292. RX_DECL struct rx_nfa_state *
  1293. rx_nfa_state (rx)
  1294. struct rx *rx;
  1295. #endif
  1296. {
  1297. struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n));
  1298. if (!n)
  1299. return 0;
  1300. bzero (n, sizeof (*n));
  1301. n->next = rx->nfa_states;
  1302. rx->nfa_states = n;
  1303. return n;
  1304. }
  1305. #ifdef __STDC__
  1306. RX_DECL void
  1307. rx_free_nfa_state (struct rx_nfa_state * n)
  1308. #else
  1309. RX_DECL void
  1310. rx_free_nfa_state (n)
  1311. struct rx_nfa_state * n;
  1312. #endif
  1313. {
  1314. free ((char *)n);
  1315. }
  1316. /* This looks up an nfa node, given a numeric id. Numeric id's are
  1317. * assigned after the nfa has been built.
  1318. */
  1319. #ifdef __STDC__
  1320. RX_DECL struct rx_nfa_state *
  1321. rx_id_to_nfa_state (struct rx * rx,
  1322. int id)
  1323. #else
  1324. RX_DECL struct rx_nfa_state *
  1325. rx_id_to_nfa_state (rx, id)
  1326. struct rx * rx;
  1327. int id;
  1328. #endif
  1329. {
  1330. struct rx_nfa_state * n;
  1331. for (n = rx->nfa_states; n; n = n->next)
  1332. if (n->id == id)
  1333. return n;
  1334. return 0;
  1335. }
  1336. /* This adds an edge between two nodes, but doesn't initialize the
  1337. * edge label.
  1338. */
  1339. #ifdef __STDC__
  1340. RX_DECL struct rx_nfa_edge *
  1341. rx_nfa_edge (struct rx *rx,
  1342. enum rx_nfa_etype type,
  1343. struct rx_nfa_state *start,
  1344. struct rx_nfa_state *dest)
  1345. #else
  1346. RX_DECL struct rx_nfa_edge *
  1347. rx_nfa_edge (rx, type, start, dest)
  1348. struct rx *rx;
  1349. enum rx_nfa_etype type;
  1350. struct rx_nfa_state *start;
  1351. struct rx_nfa_state *dest;
  1352. #endif
  1353. {
  1354. struct rx_nfa_edge *e;
  1355. e = (struct rx_nfa_edge *)malloc (sizeof (*e));
  1356. if (!e)
  1357. return 0;
  1358. e->next = start->edges;
  1359. start->edges = e;
  1360. e->type = type;
  1361. e->dest = dest;
  1362. return e;
  1363. }
  1364. #ifdef __STDC__
  1365. RX_DECL void
  1366. rx_free_nfa_edge (struct rx_nfa_edge * e)
  1367. #else
  1368. RX_DECL void
  1369. rx_free_nfa_edge (e)
  1370. struct rx_nfa_edge * e;
  1371. #endif
  1372. {
  1373. free ((char *)e);
  1374. }
  1375. /* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
  1376. * of an NFA. These are added to an nfa automaticly by eclose_nfa.
  1377. */
  1378. #ifdef __STDC__
  1379. static struct rx_possible_future *
  1380. rx_possible_future (struct rx * rx,
  1381. struct rx_se_list * effects)
  1382. #else
  1383. static struct rx_possible_future *
  1384. rx_possible_future (rx, effects)
  1385. struct rx * rx;
  1386. struct rx_se_list * effects;
  1387. #endif
  1388. {
  1389. struct rx_possible_future *ec;
  1390. ec = (struct rx_possible_future *) malloc (sizeof (*ec));
  1391. if (!ec)
  1392. return 0;
  1393. ec->destset = 0;
  1394. ec->next = 0;
  1395. ec->effects = effects;
  1396. return ec;
  1397. }
  1398. #ifdef __STDC__
  1399. static void
  1400. rx_free_possible_future (struct rx_possible_future * pf)
  1401. #else
  1402. static void
  1403. rx_free_possible_future (pf)
  1404. struct rx_possible_future * pf;
  1405. #endif
  1406. {
  1407. free ((char *)pf);
  1408. }
  1409. #ifdef __STDC__
  1410. RX_DECL void
  1411. rx_free_nfa (struct rx *rx)
  1412. #else
  1413. RX_DECL void
  1414. rx_free_nfa (rx)
  1415. struct rx *rx;
  1416. #endif
  1417. {
  1418. while (rx->nfa_states)
  1419. {
  1420. while (rx->nfa_states->edges)
  1421. {
  1422. switch (rx->nfa_states->edges->type)
  1423. {
  1424. case ne_cset:
  1425. rx_free_cset (rx, rx->nfa_states->edges->params.cset);
  1426. break;
  1427. default:
  1428. break;
  1429. }
  1430. {
  1431. struct rx_nfa_edge * e;
  1432. e = rx->nfa_states->edges;
  1433. rx->nfa_states->edges = rx->nfa_states->edges->next;
  1434. rx_free_nfa_edge (e);
  1435. }
  1436. } /* while (rx->nfa_states->edges) */
  1437. {
  1438. /* Iterate over the partial epsilon closures of rx->nfa_states */
  1439. struct rx_possible_future * pf = rx->nfa_states->futures;
  1440. while (pf)
  1441. {
  1442. struct rx_possible_future * pft = pf;
  1443. pf = pf->next;
  1444. rx_free_possible_future (pft);
  1445. }
  1446. }
  1447. {
  1448. struct rx_nfa_state *n;
  1449. n = rx->nfa_states;
  1450. rx->nfa_states = rx->nfa_states->next;
  1451. rx_free_nfa_state (n);
  1452. }
  1453. }
  1454. }
  1455. /* This page: translating a pattern expression into an nfa and doing the
  1456. * static part of the nfa->super-nfa translation.
  1457. */
  1458. /* This is the thompson regexp->nfa algorithm.
  1459. * It is modified to allow for `side-effect epsilons.' Those are
  1460. * edges that are taken whenever a similar epsilon edge would be,
  1461. * but which imply that some side effect occurs when the edge
  1462. * is taken.
  1463. *
  1464. * Side effects are used to model parts of the pattern langauge
  1465. * that are not regular (in the formal sense).
  1466. */
  1467. #ifdef __STDC__
  1468. RX_DECL int
  1469. rx_build_nfa (struct rx *rx,
  1470. struct rexp_node *rexp,
  1471. struct rx_nfa_state **start,
  1472. struct rx_nfa_state **end)
  1473. #else
  1474. RX_DECL int
  1475. rx_build_nfa (rx, rexp, start, end)
  1476. struct rx *rx;
  1477. struct rexp_node *rexp;
  1478. struct rx_nfa_state **start;
  1479. struct rx_nfa_state **end;
  1480. #endif
  1481. {
  1482. struct rx_nfa_edge *edge;
  1483. /* Start & end nodes may have been allocated by the caller. */
  1484. *start = *start ? *start : rx_nfa_state (rx);
  1485. if (!*start)
  1486. return 0;
  1487. if (!rexp)
  1488. {
  1489. *end = *start;
  1490. return 1;
  1491. }
  1492. *end = *end ? *end : rx_nfa_state (rx);
  1493. if (!*end)
  1494. {
  1495. rx_free_nfa_state (*start);
  1496. return 0;
  1497. }
  1498. switch (rexp->type)
  1499. {
  1500. case r_data:
  1501. return 0;
  1502. case r_cset:
  1503. edge = rx_nfa_edge (rx, ne_cset, *start, *end);
  1504. if (!edge)
  1505. return 0;
  1506. edge->params.cset = rx_copy_cset (rx, rexp->params.cset);
  1507. if (!edge->params.cset)
  1508. {
  1509. rx_free_nfa_edge (edge);
  1510. return 0;
  1511. }
  1512. return 1;
  1513. case r_opt:
  1514. return (rx_build_nfa (rx, rexp->params.pair.left, start, end)
  1515. && rx_nfa_edge (rx, ne_epsilon, *start, *end));
  1516. case r_star:
  1517. {
  1518. struct rx_nfa_state * star_start = 0;
  1519. struct rx_nfa_state * star_end = 0;
  1520. return (rx_build_nfa (rx, rexp->params.pair.left,
  1521. &star_start, &star_end)
  1522. && star_start
  1523. && star_end
  1524. && rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
  1525. && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
  1526. && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
  1527. && rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
  1528. }
  1529. case r_2phase_star:
  1530. {
  1531. struct rx_nfa_state * star_start = 0;
  1532. struct rx_nfa_state * star_end = 0;
  1533. struct rx_nfa_state * loop_exp_start = 0;
  1534. struct rx_nfa_state * loop_exp_end = 0;
  1535. return (rx_build_nfa (rx, rexp->params.pair.left,
  1536. &star_start, &star_end)
  1537. && rx_build_nfa (rx, rexp->params.pair.right,
  1538. &loop_exp_start, &loop_exp_end)
  1539. && star_start
  1540. && star_end
  1541. && loop_exp_end
  1542. && loop_exp_start
  1543. && rx_nfa_edge (rx, ne_epsilon, star_start, *end)
  1544. && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
  1545. && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
  1546. && rx_nfa_edge (rx, ne_epsilon, star_end, loop_exp_start)
  1547. && rx_nfa_edge (rx, ne_epsilon, loop_exp_end, star_start));
  1548. }
  1549. case r_concat:
  1550. {
  1551. struct rx_nfa_state *shared = 0;
  1552. return
  1553. (rx_build_nfa (rx, rexp->params.pair.left, start, &shared)
  1554. && rx_build_nfa (rx, rexp->params.pair.right, &shared, end));
  1555. }
  1556. case r_alternate:
  1557. {
  1558. struct rx_nfa_state *ls = 0;
  1559. struct rx_nfa_state *le = 0;
  1560. struct rx_nfa_state *rs = 0;
  1561. struct rx_nfa_state *re = 0;
  1562. return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le)
  1563. && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re)
  1564. && rx_nfa_edge (rx, ne_epsilon, *start, ls)
  1565. && rx_nfa_edge (rx, ne_epsilon, *start, rs)
  1566. && rx_nfa_edge (rx, ne_epsilon, le, *end)
  1567. && rx_nfa_edge (rx, ne_epsilon, re, *end));
  1568. }
  1569. case r_side_effect:
  1570. edge = rx_nfa_edge (rx, ne_side_effect, *start, *end);
  1571. if (!edge)
  1572. return 0;
  1573. edge->params.side_effect = rexp->params.side_effect;
  1574. return 1;
  1575. }
  1576. /* this should never happen */
  1577. return 0;
  1578. }
  1579. /* RX_NAME_NFA_STATES identifies all nodes with outgoing non-epsilon
  1580. * transitions. Only these nodes can occur in super-states.
  1581. * All nodes are given an integer id.
  1582. * The id is non-negative if the node has non-epsilon out-transitions, negative
  1583. * otherwise (this is because we want the non-negative ids to be used as
  1584. * array indexes in a few places).
  1585. */
  1586. #ifdef __STDC__
  1587. RX_DECL void
  1588. rx_name_nfa_states (struct rx *rx)
  1589. #else
  1590. RX_DECL void
  1591. rx_name_nfa_states (rx)
  1592. struct rx *rx;
  1593. #endif
  1594. {
  1595. struct rx_nfa_state *n = rx->nfa_states;
  1596. rx->nodec = 0;
  1597. rx->epsnodec = -1;
  1598. while (n)
  1599. {
  1600. struct rx_nfa_edge *e = n->edges;
  1601. if (n->is_start)
  1602. n->eclosure_needed = 1;
  1603. while (e)
  1604. {
  1605. switch (e->type)
  1606. {
  1607. case ne_epsilon:
  1608. case ne_side_effect:
  1609. break;
  1610. case ne_cset:
  1611. n->id = rx->nodec++;
  1612. {
  1613. struct rx_nfa_edge *from_n = n->edges;
  1614. while (from_n)
  1615. {
  1616. from_n->dest->eclosure_needed = 1;
  1617. from_n = from_n->next;
  1618. }
  1619. }
  1620. goto cont;
  1621. }
  1622. e = e->next;
  1623. }
  1624. n->id = rx->epsnodec--;
  1625. cont:
  1626. n = n->next;
  1627. }
  1628. rx->epsnodec = -rx->epsnodec;
  1629. }
  1630. /* This page: data structures for the static part of the nfa->supernfa
  1631. * translation.
  1632. *
  1633. * There are side effect lists -- lists of side effects occuring
  1634. * along an uninterrupted, acyclic path of side-effect epsilon edges.
  1635. * Such paths are collapsed to single edges in the course of computing
  1636. * epsilon closures. Such single edges are labled with a list of all
  1637. * the side effects entailed in crossing them. Like lists of side
  1638. * effects are made == by the constructors below.
  1639. *
  1640. * There are also nfa state sets. These are used to hold a list of all
  1641. * states reachable from a starting state for a given type of transition
  1642. * and side effect list. These are also hash-consed.
  1643. */
  1644. /* The next several functions compare, construct, etc. lists of side
  1645. * effects. See ECLOSE_NFA (below) for details.
  1646. */
  1647. /* Ordering of rx_se_list
  1648. * (-1, 0, 1 return value convention).
  1649. */
  1650. #ifdef __STDC__
  1651. static int
  1652. se_list_cmp (void * va, void * vb)
  1653. #else
  1654. static int
  1655. se_list_cmp (va, vb)
  1656. void * va;
  1657. void * vb;
  1658. #endif
  1659. {
  1660. struct rx_se_list * a = (struct rx_se_list *)va;
  1661. struct rx_se_list * b = (struct rx_se_list *)vb;
  1662. return ((va == vb)
  1663. ? 0
  1664. : (!va
  1665. ? -1
  1666. : (!vb
  1667. ? 1
  1668. : ((long)a->car < (long)b->car
  1669. ? 1
  1670. : ((long)a->car > (long)b->car
  1671. ? -1
  1672. : se_list_cmp ((void *)a->cdr, (void *)b->cdr))))));
  1673. }
  1674. #ifdef __STDC__
  1675. static int
  1676. se_list_equal (void * va, void * vb)
  1677. #else
  1678. static int
  1679. se_list_equal (va, vb)
  1680. void * va;
  1681. void * vb;
  1682. #endif
  1683. {
  1684. return !(se_list_cmp (va, vb));
  1685. }
  1686. static struct rx_hash_rules se_list_hash_rules =
  1687. {
  1688. se_list_equal,
  1689. compiler_hash_alloc,
  1690. compiler_free_hash,
  1691. compiler_hash_item_alloc,
  1692. compiler_free_hash_item
  1693. };
  1694. #ifdef __STDC__
  1695. static struct rx_se_list *
  1696. side_effect_cons (struct rx * rx,
  1697. void * se, struct rx_se_list * list)
  1698. #else
  1699. static struct rx_se_list *
  1700. side_effect_cons (rx, se, list)
  1701. struct rx * rx;
  1702. void * se;
  1703. struct rx_se_list * list;
  1704. #endif
  1705. {
  1706. struct rx_se_list * l;
  1707. l = ((struct rx_se_list *) malloc (sizeof (*l)));
  1708. if (!l)
  1709. return 0;
  1710. l->car = se;
  1711. l->cdr = list;
  1712. return l;
  1713. }
  1714. #ifdef __STDC__
  1715. static struct rx_se_list *
  1716. hash_cons_se_prog (struct rx * rx,
  1717. struct rx_hash * memo,
  1718. void * car, struct rx_se_list * cdr)
  1719. #else
  1720. static struct rx_se_list *
  1721. hash_cons_se_prog (rx, memo, car, cdr)
  1722. struct rx * rx;
  1723. struct rx_hash * memo;
  1724. void * car;
  1725. struct rx_se_list * cdr;
  1726. #endif
  1727. {
  1728. long hash = (long)car ^ (long)cdr;
  1729. struct rx_se_list template;
  1730. template.car = car;
  1731. template.cdr = cdr;
  1732. {
  1733. struct rx_hash_item * it = rx_hash_store (memo, hash,
  1734. (void *)&template,
  1735. &se_list_hash_rules);
  1736. if (!it)
  1737. return 0;
  1738. if (it->data == (void *)&template)
  1739. {
  1740. struct rx_se_list * consed;
  1741. consed = (struct rx_se_list *) malloc (sizeof (*consed));
  1742. if (! consed)
  1743. {
  1744. free ((char *)it);
  1745. return 0;
  1746. }
  1747. *consed = template;
  1748. it->data = (void *)consed;
  1749. }
  1750. return (struct rx_se_list *)it->data;
  1751. }
  1752. }
  1753. #ifdef __STDC__
  1754. static struct rx_se_list *
  1755. hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)
  1756. #else
  1757. static struct rx_se_list *
  1758. hash_se_prog (rx, memo, prog)
  1759. struct rx * rx;
  1760. struct rx_hash * memo;
  1761. struct rx_se_list * prog;
  1762. #endif
  1763. {
  1764. struct rx_se_list * answer = 0;
  1765. while (prog)
  1766. {
  1767. answer = hash_cons_se_prog (rx, memo, prog->car, answer);
  1768. if (!answer)
  1769. return 0;
  1770. prog = prog->cdr;
  1771. }
  1772. return answer;
  1773. }
  1774. #ifdef __STDC__
  1775. static int
  1776. nfa_set_cmp (void * va, void * vb)
  1777. #else
  1778. static int
  1779. nfa_set_cmp (va, vb)
  1780. void * va;
  1781. void * vb;
  1782. #endif
  1783. {
  1784. struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va;
  1785. struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb;
  1786. return ((va == vb)
  1787. ? 0
  1788. : (!va
  1789. ? -1
  1790. : (!vb
  1791. ? 1
  1792. : (a->car->id < b->car->id
  1793. ? 1
  1794. : (a->car->id > b->car->id
  1795. ? -1
  1796. : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
  1797. }
  1798. #ifdef __STDC__
  1799. static int
  1800. nfa_set_equal (void * va, void * vb)
  1801. #else
  1802. static int
  1803. nfa_set_equal (va, vb)
  1804. void * va;
  1805. void * vb;
  1806. #endif
  1807. {
  1808. return !nfa_set_cmp (va, vb);
  1809. }
  1810. static struct rx_hash_rules nfa_set_hash_rules =
  1811. {
  1812. nfa_set_equal,
  1813. compiler_hash_alloc,
  1814. compiler_free_hash,
  1815. compiler_hash_item_alloc,
  1816. compiler_free_hash_item
  1817. };
  1818. #ifdef __STDC__
  1819. static struct rx_nfa_state_set *
  1820. nfa_set_cons (struct rx * rx,
  1821. struct rx_hash * memo, struct rx_nfa_state * state,
  1822. struct rx_nfa_state_set * set)
  1823. #else
  1824. static struct rx_nfa_state_set *
  1825. nfa_set_cons (rx, memo, state, set)
  1826. struct rx * rx;
  1827. struct rx_hash * memo;
  1828. struct rx_nfa_state * state;
  1829. struct rx_nfa_state_set * set;
  1830. #endif
  1831. {
  1832. struct rx_nfa_state_set template;
  1833. struct rx_hash_item * node;
  1834. template.car = state;
  1835. template.cdr = set;
  1836. node = rx_hash_store (memo,
  1837. (((long)state) >> 8) ^ (long)set,
  1838. &template, &nfa_set_hash_rules);
  1839. if (!node)
  1840. return 0;
  1841. if (node->data == &template)
  1842. {
  1843. struct rx_nfa_state_set * l;
  1844. l = (struct rx_nfa_state_set *) malloc (sizeof (*l));
  1845. node->data = (void *) l;
  1846. if (!l)
  1847. return 0;
  1848. *l = template;
  1849. }
  1850. return (struct rx_nfa_state_set *)node->data;
  1851. }
  1852. #ifdef __STDC__
  1853. static struct rx_nfa_state_set *
  1854. nfa_set_enjoin (struct rx * rx,
  1855. struct rx_hash * memo, struct rx_nfa_state * state,
  1856. struct rx_nfa_state_set * set)
  1857. #else
  1858. static struct rx_nfa_state_set *
  1859. nfa_set_enjoin (rx, memo, state, set)
  1860. struct rx * rx;
  1861. struct rx_hash * memo;
  1862. struct rx_nfa_state * state;
  1863. struct rx_nfa_state_set * set;
  1864. #endif
  1865. {
  1866. if (!set || state->id < set->car->id)
  1867. return nfa_set_cons (rx, memo, state, set);
  1868. if (state->id == set->car->id)
  1869. return set;
  1870. else
  1871. {
  1872. struct rx_nfa_state_set * newcdr
  1873. = nfa_set_enjoin (rx, memo, state, set->cdr);
  1874. if (newcdr != set->cdr)
  1875. set = nfa_set_cons (rx, memo, set->car, newcdr);
  1876. return set;
  1877. }
  1878. }
  1879. /* This page: computing epsilon closures. The closures aren't total.
  1880. * Each node's closures are partitioned according to the side effects entailed
  1881. * along the epsilon edges. Return true on success.
  1882. */
  1883. struct eclose_frame
  1884. {
  1885. struct rx_se_list *prog_backwards;
  1886. };
  1887. static int eclose_node (struct rx *, struct rx_nfa_state *,
  1888. struct rx_nfa_state *,
  1889. struct eclose_frame *);
  1890. RX_DECL int rx_eclose_nfa (struct rx *);
  1891. RX_DECL void rx_delete_epsilon_transitions
  1892. (struct rx *);
  1893. static int nfacmp (void *, void *);
  1894. static int count_hash_nodes (struct rx_hash *);
  1895. static void nfa_set_freer (struct rx_hash_item *);
  1896. RX_DECL int rx_compactify_nfa (struct rx *, void **,
  1897. unsigned long *);
  1898. static char *rx_cache_malloc (struct rx_cache *, int);
  1899. static void rx_cache_free (struct rx_cache *,
  1900. struct rx_freelist **, char *);
  1901. static void install_transition (struct rx_superstate *,
  1902. struct rx_inx *, rx_Bitset);
  1903. static int qlen (struct rx_superstate *);
  1904. static void check_cache (struct rx_cache *);
  1905. static void semifree_superstate (struct rx_cache *);
  1906. static void refresh_semifree_superstate
  1907. (struct rx_cache *,
  1908. struct rx_superstate *);
  1909. static void rx_refresh_this_superstate
  1910. (struct rx_cache *,
  1911. struct rx_superstate *);
  1912. static void release_superset_low (struct rx_cache *,
  1913. struct rx_superset *);
  1914. RX_DECL void rx_release_superset (struct rx *, struct rx_superset *);
  1915. static int rx_really_free_superstate (struct rx_cache *);
  1916. static char *rx_cache_get (struct rx_cache *,
  1917. struct rx_freelist **);
  1918. static char *rx_cache_malloc_or_get (struct rx_cache *,
  1919. struct rx_freelist **, int);
  1920. static char *rx_cache_get_superstate (struct rx_cache *);
  1921. static int supersetcmp (void *, void *);
  1922. static struct rx_hash_item
  1923. *superset_allocator (struct rx_hash_rules *, void *);
  1924. static struct rx_hash
  1925. *super_hash_allocator (struct rx_hash_rules *);
  1926. static void super_hash_liberator (struct rx_hash *,
  1927. struct rx_hash_rules *);
  1928. static void superset_hash_item_liberator
  1929. (struct rx_hash_item *,
  1930. struct rx_hash_rules *);
  1931. static int bytes_for_cache_size (int, int);
  1932. static void rx_morecore (struct rx_cache *);
  1933. RX_DECL struct rx_superset
  1934. *rx_superset_cons (struct rx *, struct rx_nfa_state *,
  1935. struct rx_superset *);
  1936. RX_DECL struct rx_superset
  1937. *rx_superstate_eclosure_union
  1938. (struct rx *, struct rx_superset *,
  1939. struct rx_nfa_state_set *);
  1940. static struct rx_distinct_future
  1941. *include_futures (struct rx *,
  1942. struct rx_distinct_future *,
  1943. struct rx_nfa_state *,
  1944. struct rx_superstate *);
  1945. RX_DECL struct rx_superstate
  1946. *rx_superstate (struct rx *, struct rx_superset *);
  1947. static int solve_destination (struct rx *,
  1948. struct rx_distinct_future *);
  1949. static int compute_super_edge (struct rx *,
  1950. struct rx_distinct_future **,
  1951. rx_Bitset, struct rx_superstate *,
  1952. unsigned char);
  1953. static struct rx_super_edge
  1954. *rx_super_edge (struct rx *, struct rx_superstate *,
  1955. rx_Bitset,
  1956. struct rx_distinct_future *);
  1957. static void install_partial_transition
  1958. (struct rx_superstate *,
  1959. struct rx_inx *, RX_subset, int);
  1960. RX_DECL struct rx_inx
  1961. *rx_handle_cache_miss (struct rx *, struct rx_superstate *,
  1962. unsigned char, void *);
  1963. static boolean
  1964. at_begline_loc_p (__const__ char *, __const__ char *,
  1965. reg_syntax_t);
  1966. static boolean
  1967. at_endline_loc_p (__const__ char *, __const__ char *,
  1968. int);
  1969. static rx_Bitset
  1970. inverse_translation (struct re_pattern_buffer *, char *,
  1971. rx_Bitset, unsigned char *, int);
  1972. #ifdef __STDC__
  1973. static int
  1974. eclose_node (struct rx *rx, struct rx_nfa_state *outnode,
  1975. struct rx_nfa_state *node, struct eclose_frame *frame)
  1976. #else
  1977. static int
  1978. eclose_node (rx, outnode, node, frame)
  1979. struct rx *rx;
  1980. struct rx_nfa_state *outnode;
  1981. struct rx_nfa_state *node;
  1982. struct eclose_frame *frame;
  1983. #endif
  1984. {
  1985. struct rx_nfa_edge *e = node->edges;
  1986. /* For each node, we follow all epsilon paths to build the closure.
  1987. * The closure omits nodes that have only epsilon edges.
  1988. * The closure is split into partial closures -- all the states in
  1989. * a partial closure are reached by crossing the same list of
  1990. * of side effects (though not necessarily the same path).
  1991. */
  1992. if (node->mark)
  1993. return 1;
  1994. node->mark = 1;
  1995. if (node->id >= 0 || node->is_final)
  1996. {
  1997. struct rx_possible_future **ec;
  1998. struct rx_se_list * prog_in_order
  1999. = ((struct rx_se_list *)hash_se_prog (rx,
  2000. &rx->se_list_memo,
  2001. frame->prog_backwards));
  2002. int cmp;
  2003. ec = &outnode->futures;
  2004. while (*ec)
  2005. {
  2006. cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order);
  2007. if (cmp <= 0)
  2008. break;
  2009. ec = &(*ec)->next;
  2010. }
  2011. if (!*ec || (cmp < 0))
  2012. {
  2013. struct rx_possible_future * saved = *ec;
  2014. *ec = rx_possible_future (rx, prog_in_order);
  2015. (*ec)->next = saved;
  2016. if (!*ec)
  2017. return 0;
  2018. }
  2019. if (node->id >= 0)
  2020. {
  2021. (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo,
  2022. node, (*ec)->destset);
  2023. if (!(*ec)->destset)
  2024. return 0;
  2025. }
  2026. }
  2027. while (e)
  2028. {
  2029. switch (e->type)
  2030. {
  2031. case ne_epsilon:
  2032. if (!eclose_node (rx, outnode, e->dest, frame))
  2033. return 0;
  2034. break;
  2035. case ne_side_effect:
  2036. {
  2037. frame->prog_backwards = side_effect_cons (rx,
  2038. e->params.side_effect,
  2039. frame->prog_backwards);
  2040. if (!frame->prog_backwards)
  2041. return 0;
  2042. if (!eclose_node (rx, outnode, e->dest, frame))
  2043. return 0;
  2044. {
  2045. struct rx_se_list * dying = frame->prog_backwards;
  2046. frame->prog_backwards = frame->prog_backwards->cdr;
  2047. free ((char *)dying);
  2048. }
  2049. break;
  2050. }
  2051. default:
  2052. break;
  2053. }
  2054. e = e->next;
  2055. }
  2056. node->mark = 0;
  2057. return 1;
  2058. }
  2059. #ifdef __STDC__
  2060. RX_DECL int
  2061. rx_eclose_nfa (struct rx *rx)
  2062. #else
  2063. RX_DECL int
  2064. rx_eclose_nfa (rx)
  2065. struct rx *rx;
  2066. #endif
  2067. {
  2068. struct rx_nfa_state *n = rx->nfa_states;
  2069. struct eclose_frame frame;
  2070. static int rx_id = 0;
  2071. frame.prog_backwards = 0;
  2072. rx->rx_id = rx_id++;
  2073. bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
  2074. bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
  2075. while (n)
  2076. {
  2077. n->futures = 0;
  2078. if (n->eclosure_needed && !eclose_node (rx, n, n, &frame))
  2079. return 0;
  2080. /* clear_marks (rx); */
  2081. n = n->next;
  2082. }
  2083. return 1;
  2084. }
  2085. /* This deletes epsilon edges from an NFA. After running eclose_node,
  2086. * we have no more need for these edges. They are removed to simplify
  2087. * further operations on the NFA.
  2088. */
  2089. #ifdef __STDC__
  2090. RX_DECL void
  2091. rx_delete_epsilon_transitions (struct rx *rx)
  2092. #else
  2093. RX_DECL void
  2094. rx_delete_epsilon_transitions (rx)
  2095. struct rx *rx;
  2096. #endif
  2097. {
  2098. struct rx_nfa_state *n = rx->nfa_states;
  2099. struct rx_nfa_edge **e;
  2100. while (n)
  2101. {
  2102. e = &n->edges;
  2103. while (*e)
  2104. {
  2105. struct rx_nfa_edge *t;
  2106. switch ((*e)->type)
  2107. {
  2108. case ne_epsilon:
  2109. case ne_side_effect:
  2110. t = *e;
  2111. *e = t->next;
  2112. rx_free_nfa_edge (t);
  2113. break;
  2114. default:
  2115. e = &(*e)->next;
  2116. break;
  2117. }
  2118. }
  2119. n = n->next;
  2120. }
  2121. }
  2122. /* This page: storing the nfa in a contiguous region of memory for
  2123. * subsequent conversion to a super-nfa.
  2124. */
  2125. /* This is for qsort on an array of nfa_states. The order
  2126. * is based on state ids and goes
  2127. * [0...MAX][MIN..-1] where (MAX>=0) and (MIN<0)
  2128. * This way, positive ids double as array indices.
  2129. */
  2130. #ifdef __STDC__
  2131. static int
  2132. nfacmp (void * va, void * vb)
  2133. #else
  2134. static int
  2135. nfacmp (va, vb)
  2136. void * va;
  2137. void * vb;
  2138. #endif
  2139. {
  2140. struct rx_nfa_state **a = (struct rx_nfa_state **)va;
  2141. struct rx_nfa_state **b = (struct rx_nfa_state **)vb;
  2142. return (*a == *b /* &&&& 3.18 */
  2143. ? 0
  2144. : (((*a)->id < 0) == ((*b)->id < 0)
  2145. ? (((*a)->id < (*b)->id) ? -1 : 1)
  2146. : (((*a)->id < 0)
  2147. ? 1 : -1)));
  2148. }
  2149. #ifdef __STDC__
  2150. static int
  2151. count_hash_nodes (struct rx_hash * st)
  2152. #else
  2153. static int
  2154. count_hash_nodes (st)
  2155. struct rx_hash * st;
  2156. #endif
  2157. {
  2158. int x;
  2159. int count = 0;
  2160. for (x = 0; x < 13; ++x)
  2161. count += ((st->children[x])
  2162. ? count_hash_nodes (st->children[x])
  2163. : st->bucket_size[x]);
  2164. return count;
  2165. }
  2166. #ifdef __STDC__
  2167. static void
  2168. se_memo_freer (struct rx_hash_item * node)
  2169. #else
  2170. static void
  2171. se_memo_freer (node)
  2172. struct rx_hash_item * node;
  2173. #endif
  2174. {
  2175. free ((char *)node->data);
  2176. }
  2177. #ifdef __STDC__
  2178. static void
  2179. nfa_set_freer (struct rx_hash_item * node)
  2180. #else
  2181. static void
  2182. nfa_set_freer (node)
  2183. struct rx_hash_item * node;
  2184. #endif
  2185. {
  2186. free ((char *)node->data);
  2187. }
  2188. /* This copies an entire NFA into a single malloced block of memory.
  2189. * Mostly this is for compatability with regex.c, though it is convenient
  2190. * to have the nfa nodes in an array.
  2191. */
  2192. #ifdef __STDC__
  2193. RX_DECL int
  2194. rx_compactify_nfa (struct rx *rx,
  2195. void **mem, unsigned long *size)
  2196. #else
  2197. RX_DECL int
  2198. rx_compactify_nfa (rx, mem, size)
  2199. struct rx *rx;
  2200. void **mem;
  2201. unsigned long *size;
  2202. #endif
  2203. {
  2204. int total_nodec;
  2205. struct rx_nfa_state *n;
  2206. int edgec = 0;
  2207. int eclosec = 0;
  2208. int se_list_consc = count_hash_nodes (&rx->se_list_memo);
  2209. int nfa_setc = count_hash_nodes (&rx->set_list_memo);
  2210. unsigned long total_size;
  2211. /* This takes place in two stages. First, the total size of the
  2212. * nfa is computed, then structures are copied.
  2213. */
  2214. n = rx->nfa_states;
  2215. total_nodec = 0;
  2216. while (n)
  2217. {
  2218. struct rx_nfa_edge *e = n->edges;
  2219. struct rx_possible_future *ec = n->futures;
  2220. ++total_nodec;
  2221. while (e)
  2222. {
  2223. ++edgec;
  2224. e = e->next;
  2225. }
  2226. while (ec)
  2227. {
  2228. ++eclosec;
  2229. ec = ec->next;
  2230. }
  2231. n = n->next;
  2232. }
  2233. total_size = (total_nodec * sizeof (struct rx_nfa_state)
  2234. + edgec * rx_sizeof_bitset (rx->local_cset_size)
  2235. + edgec * sizeof (struct rx_nfa_edge)
  2236. + nfa_setc * sizeof (struct rx_nfa_state_set)
  2237. + eclosec * sizeof (struct rx_possible_future)
  2238. + se_list_consc * sizeof (struct rx_se_list)
  2239. + rx->reserved);
  2240. if (total_size > *size)
  2241. {
  2242. *mem = remalloc (*mem, total_size);
  2243. if (*mem)
  2244. *size = total_size;
  2245. else
  2246. return 0;
  2247. }
  2248. /* Now we've allocated the memory; this copies the NFA. */
  2249. {
  2250. static struct rx_nfa_state **scratch = 0;
  2251. static int scratch_alloc = 0;
  2252. struct rx_nfa_state *state_base = (struct rx_nfa_state *) * mem;
  2253. struct rx_nfa_state *new_state = state_base;
  2254. struct rx_nfa_edge *new_edge =
  2255. (struct rx_nfa_edge *)
  2256. ((char *) state_base + total_nodec * sizeof (struct rx_nfa_state));
  2257. struct rx_se_list * new_se_list =
  2258. (struct rx_se_list *)
  2259. ((char *)new_edge + edgec * sizeof (struct rx_nfa_edge));
  2260. struct rx_possible_future *new_close =
  2261. ((struct rx_possible_future *)
  2262. ((char *) new_se_list
  2263. + se_list_consc * sizeof (struct rx_se_list)));
  2264. struct rx_nfa_state_set * new_nfa_set =
  2265. ((struct rx_nfa_state_set *)
  2266. ((char *)new_close + eclosec * sizeof (struct rx_possible_future)));
  2267. char *new_bitset =
  2268. ((char *) new_nfa_set + nfa_setc * sizeof (struct rx_nfa_state_set));
  2269. int x;
  2270. struct rx_nfa_state *n;
  2271. if (scratch_alloc < total_nodec)
  2272. {
  2273. scratch = ((struct rx_nfa_state **)
  2274. remalloc (scratch, total_nodec * sizeof (*scratch)));
  2275. if (scratch)
  2276. scratch_alloc = total_nodec;
  2277. else
  2278. {
  2279. scratch_alloc = 0;
  2280. return 0;
  2281. }
  2282. }
  2283. for (x = 0, n = rx->nfa_states; n; n = n->next)
  2284. scratch[x++] = n;
  2285. qsort (scratch, total_nodec, sizeof (struct rx_nfa_state *),
  2286. (__compar_fn_t)nfacmp);
  2287. for (x = 0; x < total_nodec; ++x)
  2288. {
  2289. struct rx_possible_future *eclose = scratch[x]->futures;
  2290. struct rx_nfa_edge *edge = scratch[x]->edges;
  2291. struct rx_nfa_state *cn = new_state++;
  2292. cn->futures = 0;
  2293. cn->edges = 0;
  2294. cn->next = (x == total_nodec - 1) ? 0 : (cn + 1);
  2295. cn->id = scratch[x]->id;
  2296. cn->is_final = scratch[x]->is_final;
  2297. cn->is_start = scratch[x]->is_start;
  2298. cn->mark = 0;
  2299. while (edge)
  2300. {
  2301. int indx = (edge->dest->id < 0
  2302. ? (total_nodec + edge->dest->id)
  2303. : edge->dest->id);
  2304. struct rx_nfa_edge *e = new_edge++;
  2305. rx_Bitset cset = (rx_Bitset) new_bitset;
  2306. new_bitset += rx_sizeof_bitset (rx->local_cset_size);
  2307. rx_bitset_null (rx->local_cset_size, cset);
  2308. rx_bitset_union (rx->local_cset_size, cset, edge->params.cset);
  2309. e->next = cn->edges;
  2310. cn->edges = e;
  2311. e->type = edge->type;
  2312. e->dest = state_base + indx;
  2313. e->params.cset = cset;
  2314. edge = edge->next;
  2315. }
  2316. while (eclose)
  2317. {
  2318. struct rx_possible_future *ec = new_close++;
  2319. struct rx_hash_item * sp;
  2320. struct rx_se_list ** sepos;
  2321. struct rx_se_list * sesrc;
  2322. struct rx_nfa_state_set * destlst;
  2323. struct rx_nfa_state_set ** destpos;
  2324. ec->next = cn->futures;
  2325. cn->futures = ec;
  2326. for (sepos = &ec->effects, sesrc = eclose->effects;
  2327. sesrc;
  2328. sesrc = sesrc->cdr, sepos = &(*sepos)->cdr)
  2329. {
  2330. sp = rx_hash_find (&rx->se_list_memo,
  2331. (long)sesrc->car ^ (long)sesrc->cdr,
  2332. sesrc, &se_list_hash_rules);
  2333. if (sp->binding)
  2334. {
  2335. sesrc = (struct rx_se_list *)sp->binding;
  2336. break;
  2337. }
  2338. *new_se_list = *sesrc;
  2339. sp->binding = (void *)new_se_list;
  2340. *sepos = new_se_list;
  2341. ++new_se_list;
  2342. }
  2343. *sepos = sesrc;
  2344. for (destpos = &ec->destset, destlst = eclose->destset;
  2345. destlst;
  2346. destpos = &(*destpos)->cdr, destlst = destlst->cdr)
  2347. {
  2348. sp = rx_hash_find (&rx->set_list_memo,
  2349. ((((long)destlst->car) >> 8)
  2350. ^ (long)destlst->cdr),
  2351. destlst, &nfa_set_hash_rules);
  2352. if (sp->binding)
  2353. {
  2354. destlst = (struct rx_nfa_state_set *)sp->binding;
  2355. break;
  2356. }
  2357. *new_nfa_set = *destlst;
  2358. new_nfa_set->car = state_base + destlst->car->id;
  2359. sp->binding = (void *)new_nfa_set;
  2360. *destpos = new_nfa_set;
  2361. ++new_nfa_set;
  2362. }
  2363. *destpos = destlst;
  2364. eclose = eclose->next;
  2365. }
  2366. }
  2367. }
  2368. rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules);
  2369. bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
  2370. rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
  2371. bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
  2372. rx_free_nfa (rx);
  2373. rx->nfa_states = (struct rx_nfa_state *)*mem;
  2374. return 1;
  2375. }
  2376. /* The functions in the next several pages define the lazy-NFA-conversion used
  2377. * by matchers. The input to this construction is an NFA such as
  2378. * is built by compactify_nfa (rx.c). The output is the superNFA.
  2379. */
  2380. /* Match engines can use arbitrary values for opcodes. So, the parse tree
  2381. * is built using instructions names (enum rx_opcode), but the superstate
  2382. * nfa is populated with mystery opcodes (void *).
  2383. *
  2384. * For convenience, here is an id table. The opcodes are == to their inxs
  2385. *
  2386. * The lables in re_search_2 would make good values for instructions.
  2387. */
  2388. void * rx_id_instruction_table[rx_num_instructions] =
  2389. {
  2390. (void *) rx_backtrack_point,
  2391. (void *) rx_do_side_effects,
  2392. (void *) rx_cache_miss,
  2393. (void *) rx_next_char,
  2394. (void *) rx_backtrack,
  2395. (void *) rx_error_inx
  2396. };
  2397. /* Memory mgt. for superstate graphs. */
  2398. #ifdef __STDC__
  2399. static char *
  2400. rx_cache_malloc (struct rx_cache * cache, int bytes)
  2401. #else
  2402. static char *
  2403. rx_cache_malloc (cache, bytes)
  2404. struct rx_cache * cache;
  2405. int bytes;
  2406. #endif
  2407. {
  2408. while (cache->bytes_left < bytes)
  2409. {
  2410. if (cache->memory_pos)
  2411. cache->memory_pos = cache->memory_pos->next;
  2412. if (!cache->memory_pos)
  2413. {
  2414. cache->morecore (cache);
  2415. if (!cache->memory_pos)
  2416. return 0;
  2417. }
  2418. cache->bytes_left = cache->memory_pos->bytes;
  2419. cache->memory_addr = ((char *)cache->memory_pos
  2420. + sizeof (struct rx_blocklist));
  2421. }
  2422. cache->bytes_left -= bytes;
  2423. {
  2424. char * addr = cache->memory_addr;
  2425. cache->memory_addr += bytes;
  2426. return addr;
  2427. }
  2428. }
  2429. #ifdef __STDC__
  2430. static void
  2431. rx_cache_free (struct rx_cache * cache,
  2432. struct rx_freelist ** freelist, char * mem)
  2433. #else
  2434. static void
  2435. rx_cache_free (cache, freelist, mem)
  2436. struct rx_cache * cache;
  2437. struct rx_freelist ** freelist;
  2438. char * mem;
  2439. #endif
  2440. {
  2441. struct rx_freelist * it = (struct rx_freelist *)mem;
  2442. it->next = *freelist;
  2443. *freelist = it;
  2444. }
  2445. /* The partially instantiated superstate graph has a transition
  2446. * table at every node. There is one entry for every character.
  2447. * This fills in the transition for a set.
  2448. */
  2449. #ifdef __STDC__
  2450. static void
  2451. install_transition (struct rx_superstate *super,
  2452. struct rx_inx *answer, rx_Bitset trcset)
  2453. #else
  2454. static void
  2455. install_transition (super, answer, trcset)
  2456. struct rx_superstate *super;
  2457. struct rx_inx *answer;
  2458. rx_Bitset trcset;
  2459. #endif
  2460. {
  2461. struct rx_inx * transitions = super->transitions;
  2462. int chr;
  2463. for (chr = 0; chr < 256; )
  2464. if (!*trcset)
  2465. {
  2466. ++trcset;
  2467. chr += 32;
  2468. }
  2469. else
  2470. {
  2471. RX_subset sub = *trcset;
  2472. RX_subset mask = 1;
  2473. int bound = chr + 32;
  2474. while (chr < bound)
  2475. {
  2476. if (sub & mask)
  2477. transitions [chr] = *answer;
  2478. ++chr;
  2479. mask <<= 1;
  2480. }
  2481. ++trcset;
  2482. }
  2483. }
  2484. #ifdef __STDC__
  2485. static int
  2486. qlen (struct rx_superstate * q)
  2487. #else
  2488. static int
  2489. qlen (q)
  2490. struct rx_superstate * q;
  2491. #endif
  2492. {
  2493. int count = 1;
  2494. struct rx_superstate * it;
  2495. if (!q)
  2496. return 0;
  2497. for (it = q->next_recyclable; it != q; it = it->next_recyclable)
  2498. ++count;
  2499. return count;
  2500. }
  2501. #ifdef __STDC__
  2502. static void
  2503. check_cache (struct rx_cache * cache)
  2504. #else
  2505. static void
  2506. check_cache (cache)
  2507. struct rx_cache * cache;
  2508. #endif
  2509. {
  2510. struct rx_cache * you_fucked_up = 0;
  2511. int total = cache->superstates;
  2512. int semi = cache->semifree_superstates;
  2513. if (semi != qlen (cache->semifree_superstate))
  2514. check_cache (you_fucked_up);
  2515. if ((total - semi) != qlen (cache->lru_superstate))
  2516. check_cache (you_fucked_up);
  2517. }
  2518. /* When a superstate is old and neglected, it can enter a
  2519. * semi-free state. A semi-free state is slated to die.
  2520. * Incoming transitions to a semi-free state are re-written
  2521. * to cause an (interpreted) fault when they are taken.
  2522. * The fault handler revives the semi-free state, patches
  2523. * incoming transitions back to normal, and continues.
  2524. *
  2525. * The idea is basicly to free in two stages, aborting
  2526. * between the two if the state turns out to be useful again.
  2527. * When a free is aborted, the rescued superstate is placed
  2528. * in the most-favored slot to maximize the time until it
  2529. * is next semi-freed.
  2530. */
  2531. #ifdef __STDC__
  2532. static void
  2533. semifree_superstate (struct rx_cache * cache)
  2534. #else
  2535. static void
  2536. semifree_superstate (cache)
  2537. struct rx_cache * cache;
  2538. #endif
  2539. {
  2540. int disqualified = cache->semifree_superstates;
  2541. if (disqualified == cache->superstates)
  2542. return;
  2543. while (cache->lru_superstate->locks)
  2544. {
  2545. cache->lru_superstate = cache->lru_superstate->next_recyclable;
  2546. ++disqualified;
  2547. if (disqualified == cache->superstates)
  2548. return;
  2549. }
  2550. {
  2551. struct rx_superstate * it = cache->lru_superstate;
  2552. it->next_recyclable->prev_recyclable = it->prev_recyclable;
  2553. it->prev_recyclable->next_recyclable = it->next_recyclable;
  2554. cache->lru_superstate = (it == it->next_recyclable
  2555. ? 0
  2556. : it->next_recyclable);
  2557. if (!cache->semifree_superstate)
  2558. {
  2559. cache->semifree_superstate = it;
  2560. it->next_recyclable = it;
  2561. it->prev_recyclable = it;
  2562. }
  2563. else
  2564. {
  2565. it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
  2566. it->next_recyclable = cache->semifree_superstate;
  2567. it->prev_recyclable->next_recyclable = it;
  2568. it->next_recyclable->prev_recyclable = it;
  2569. }
  2570. {
  2571. struct rx_distinct_future *df;
  2572. it->is_semifree = 1;
  2573. ++cache->semifree_superstates;
  2574. df = it->transition_refs;
  2575. if (df)
  2576. {
  2577. df->prev_same_dest->next_same_dest = 0;
  2578. for (df = it->transition_refs; df; df = df->next_same_dest)
  2579. {
  2580. df->future_frame.inx = cache->instruction_table[rx_cache_miss];
  2581. df->future_frame.data = 0;
  2582. df->future_frame.data_2 = (void *) df;
  2583. /* If there are any NEXT-CHAR instruction frames that
  2584. * refer to this state, we convert them to CACHE-MISS frames.
  2585. */
  2586. if (!df->effects
  2587. && (df->edge->options->next_same_super_edge[0]
  2588. == df->edge->options))
  2589. install_transition (df->present, &df->future_frame,
  2590. df->edge->cset);
  2591. }
  2592. df = it->transition_refs;
  2593. df->prev_same_dest->next_same_dest = df;
  2594. }
  2595. }
  2596. }
  2597. }
  2598. #ifdef __STDC__
  2599. static void
  2600. refresh_semifree_superstate (struct rx_cache * cache,
  2601. struct rx_superstate * super)
  2602. #else
  2603. static void
  2604. refresh_semifree_superstate (cache, super)
  2605. struct rx_cache * cache;
  2606. struct rx_superstate * super;
  2607. #endif
  2608. {
  2609. struct rx_distinct_future *df;
  2610. if (super->transition_refs)
  2611. {
  2612. super->transition_refs->prev_same_dest->next_same_dest = 0;
  2613. for (df = super->transition_refs; df; df = df->next_same_dest)
  2614. {
  2615. df->future_frame.inx = cache->instruction_table[rx_next_char];
  2616. df->future_frame.data = (void *) super->transitions;
  2617. /* CACHE-MISS instruction frames that refer to this state,
  2618. * must be converted to NEXT-CHAR frames.
  2619. */
  2620. if (!df->effects
  2621. && (df->edge->options->next_same_super_edge[0]
  2622. == df->edge->options))
  2623. install_transition (df->present, &df->future_frame,
  2624. df->edge->cset);
  2625. }
  2626. super->transition_refs->prev_same_dest->next_same_dest
  2627. = super->transition_refs;
  2628. }
  2629. if (cache->semifree_superstate == super)
  2630. cache->semifree_superstate = (super->prev_recyclable == super
  2631. ? 0
  2632. : super->prev_recyclable);
  2633. super->next_recyclable->prev_recyclable = super->prev_recyclable;
  2634. super->prev_recyclable->next_recyclable = super->next_recyclable;
  2635. if (!cache->lru_superstate)
  2636. (cache->lru_superstate
  2637. = super->next_recyclable
  2638. = super->prev_recyclable
  2639. = super);
  2640. else
  2641. {
  2642. super->next_recyclable = cache->lru_superstate;
  2643. super->prev_recyclable = cache->lru_superstate->prev_recyclable;
  2644. super->next_recyclable->prev_recyclable = super;
  2645. super->prev_recyclable->next_recyclable = super;
  2646. }
  2647. super->is_semifree = 0;
  2648. --cache->semifree_superstates;
  2649. }
  2650. #ifdef __STDC__
  2651. static void
  2652. rx_refresh_this_superstate (struct rx_cache * cache, struct rx_superstate * superstate)
  2653. #else
  2654. static void
  2655. rx_refresh_this_superstate (cache, superstate)
  2656. struct rx_cache * cache;
  2657. struct rx_superstate * superstate;
  2658. #endif
  2659. {
  2660. if (superstate->is_semifree)
  2661. refresh_semifree_superstate (cache, superstate);
  2662. else if (cache->lru_superstate == superstate)
  2663. cache->lru_superstate = superstate->next_recyclable;
  2664. else if (superstate != cache->lru_superstate->prev_recyclable)
  2665. {
  2666. superstate->next_recyclable->prev_recyclable
  2667. = superstate->prev_recyclable;
  2668. superstate->prev_recyclable->next_recyclable
  2669. = superstate->next_recyclable;
  2670. superstate->next_recyclable = cache->lru_superstate;
  2671. superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
  2672. superstate->next_recyclable->prev_recyclable = superstate;
  2673. superstate->prev_recyclable->next_recyclable = superstate;
  2674. }
  2675. }
  2676. #ifdef __STDC__
  2677. static void
  2678. release_superset_low (struct rx_cache * cache,
  2679. struct rx_superset *set)
  2680. #else
  2681. static void
  2682. release_superset_low (cache, set)
  2683. struct rx_cache * cache;
  2684. struct rx_superset *set;
  2685. #endif
  2686. {
  2687. if (!--set->refs)
  2688. {
  2689. if (set->cdr)
  2690. release_superset_low (cache, set->cdr);
  2691. set->starts_for = 0;
  2692. rx_hash_free
  2693. (rx_hash_find
  2694. (&cache->superset_table,
  2695. (unsigned long)set->car ^ set->id ^ (unsigned long)set->cdr,
  2696. (void *)set,
  2697. &cache->superset_hash_rules),
  2698. &cache->superset_hash_rules);
  2699. rx_cache_free (cache, &cache->free_supersets, (char *)set);
  2700. }
  2701. }
  2702. #ifdef __STDC__
  2703. RX_DECL void
  2704. rx_release_superset (struct rx *rx,
  2705. struct rx_superset *set)
  2706. #else
  2707. RX_DECL void
  2708. rx_release_superset (rx, set)
  2709. struct rx *rx;
  2710. struct rx_superset *set;
  2711. #endif
  2712. {
  2713. release_superset_low (rx->cache, set);
  2714. }
  2715. /* This tries to add a new superstate to the superstate freelist.
  2716. * It might, as a result, free some edge pieces or hash tables.
  2717. * If nothing can be freed because too many locks are being held, fail.
  2718. */
  2719. #ifdef __STDC__
  2720. static int
  2721. rx_really_free_superstate (struct rx_cache * cache)
  2722. #else
  2723. static int
  2724. rx_really_free_superstate (cache)
  2725. struct rx_cache * cache;
  2726. #endif
  2727. {
  2728. int locked_superstates = 0;
  2729. struct rx_superstate * it;
  2730. if (!cache->superstates)
  2731. return 0;
  2732. {
  2733. /* This is a total guess. The idea is that we should expect as
  2734. * many misses as we've recently experienced. I.e., cache->misses
  2735. * should be the same as cache->semifree_superstates.
  2736. */
  2737. while ((cache->hits + cache->misses) > cache->superstates_allowed)
  2738. {
  2739. cache->hits >>= 1;
  2740. cache->misses >>= 1;
  2741. }
  2742. if ( ((cache->hits + cache->misses) * cache->semifree_superstates)
  2743. < (cache->superstates * cache->misses))
  2744. {
  2745. semifree_superstate (cache);
  2746. semifree_superstate (cache);
  2747. }
  2748. }
  2749. while (cache->semifree_superstate && cache->semifree_superstate->locks)
  2750. {
  2751. refresh_semifree_superstate (cache, cache->semifree_superstate);
  2752. ++locked_superstates;
  2753. if (locked_superstates == cache->superstates)
  2754. return 0;
  2755. }
  2756. if (cache->semifree_superstate)
  2757. {
  2758. it = cache->semifree_superstate;
  2759. it->next_recyclable->prev_recyclable = it->prev_recyclable;
  2760. it->prev_recyclable->next_recyclable = it->next_recyclable;
  2761. cache->semifree_superstate = ((it == it->next_recyclable)
  2762. ? 0
  2763. : it->next_recyclable);
  2764. --cache->semifree_superstates;
  2765. }
  2766. else
  2767. {
  2768. while (cache->lru_superstate->locks)
  2769. {
  2770. cache->lru_superstate = cache->lru_superstate->next_recyclable;
  2771. ++locked_superstates;
  2772. if (locked_superstates == cache->superstates)
  2773. return 0;
  2774. }
  2775. it = cache->lru_superstate;
  2776. it->next_recyclable->prev_recyclable = it->prev_recyclable;
  2777. it->prev_recyclable->next_recyclable = it->next_recyclable;
  2778. cache->lru_superstate = ((it == it->next_recyclable)
  2779. ? 0
  2780. : it->next_recyclable);
  2781. }
  2782. if (it->transition_refs)
  2783. {
  2784. struct rx_distinct_future *df;
  2785. for (df = it->transition_refs,
  2786. df->prev_same_dest->next_same_dest = 0;
  2787. df;
  2788. df = df->next_same_dest)
  2789. {
  2790. df->future_frame.inx = cache->instruction_table[rx_cache_miss];
  2791. df->future_frame.data = 0;
  2792. df->future_frame.data_2 = (void *) df;
  2793. df->future = 0;
  2794. }
  2795. it->transition_refs->prev_same_dest->next_same_dest =
  2796. it->transition_refs;
  2797. }
  2798. {
  2799. struct rx_super_edge *tc = it->edges;
  2800. while (tc)
  2801. {
  2802. struct rx_distinct_future * df;
  2803. struct rx_super_edge *tct = tc->next;
  2804. df = tc->options;
  2805. df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
  2806. while (df)
  2807. {
  2808. struct rx_distinct_future *dft = df;
  2809. df = df->next_same_super_edge[0];
  2810. if (dft->future && dft->future->transition_refs == dft)
  2811. {
  2812. dft->future->transition_refs = dft->next_same_dest;
  2813. if (dft->future->transition_refs == dft)
  2814. dft->future->transition_refs = 0;
  2815. }
  2816. dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
  2817. dft->prev_same_dest->next_same_dest = dft->next_same_dest;
  2818. rx_cache_free (cache, &cache->free_discernable_futures,
  2819. (char *)dft);
  2820. }
  2821. rx_cache_free (cache, &cache->free_transition_classes, (char *)tc);
  2822. tc = tct;
  2823. }
  2824. }
  2825. if (it->contents->superstate == it)
  2826. it->contents->superstate = 0;
  2827. release_superset_low (cache, it->contents);
  2828. rx_cache_free (cache, &cache->free_superstates, (char *)it);
  2829. --cache->superstates;
  2830. return 1;
  2831. }
  2832. #ifdef __STDC__
  2833. static char *
  2834. rx_cache_get (struct rx_cache * cache,
  2835. struct rx_freelist ** freelist)
  2836. #else
  2837. static char *
  2838. rx_cache_get (cache, freelist)
  2839. struct rx_cache * cache;
  2840. struct rx_freelist ** freelist;
  2841. #endif
  2842. {
  2843. while (!*freelist && rx_really_free_superstate (cache))
  2844. ;
  2845. if (!*freelist)
  2846. return 0;
  2847. {
  2848. struct rx_freelist * it = *freelist;
  2849. *freelist = it->next;
  2850. return (char *)it;
  2851. }
  2852. }
  2853. #ifdef __STDC__
  2854. static char *
  2855. rx_cache_malloc_or_get (struct rx_cache * cache,
  2856. struct rx_freelist ** freelist, int bytes)
  2857. #else
  2858. static char *
  2859. rx_cache_malloc_or_get (cache, freelist, bytes)
  2860. struct rx_cache * cache;
  2861. struct rx_freelist ** freelist;
  2862. int bytes;
  2863. #endif
  2864. {
  2865. if (!*freelist)
  2866. {
  2867. char * answer = rx_cache_malloc (cache, bytes);
  2868. if (answer)
  2869. return answer;
  2870. }
  2871. return rx_cache_get (cache, freelist);
  2872. }
  2873. #ifdef __STDC__
  2874. static char *
  2875. rx_cache_get_superstate (struct rx_cache * cache)
  2876. #else
  2877. static char *
  2878. rx_cache_get_superstate (cache)
  2879. struct rx_cache * cache;
  2880. #endif
  2881. {
  2882. char * answer;
  2883. int bytes = ( sizeof (struct rx_superstate)
  2884. + cache->local_cset_size * sizeof (struct rx_inx));
  2885. if (!cache->free_superstates
  2886. && (cache->superstates < cache->superstates_allowed))
  2887. {
  2888. answer = rx_cache_malloc (cache, bytes);
  2889. if (answer)
  2890. {
  2891. ++cache->superstates;
  2892. return answer;
  2893. }
  2894. }
  2895. answer = rx_cache_get (cache, &cache->free_superstates);
  2896. if (!answer)
  2897. {
  2898. answer = rx_cache_malloc (cache, bytes);
  2899. if (answer)
  2900. ++cache->superstates_allowed;
  2901. }
  2902. ++cache->superstates;
  2903. return answer;
  2904. }
  2905. #ifdef __STDC__
  2906. static int
  2907. supersetcmp (void * va, void * vb)
  2908. #else
  2909. static int
  2910. supersetcmp (va, vb)
  2911. void * va;
  2912. void * vb;
  2913. #endif
  2914. {
  2915. struct rx_superset * a = (struct rx_superset *)va;
  2916. struct rx_superset * b = (struct rx_superset *)vb;
  2917. return ( (a == b)
  2918. || (a && b && (a->car == b->car) && (a->cdr == b->cdr)));
  2919. }
  2920. #ifdef __STDC__
  2921. static struct rx_hash_item *
  2922. superset_allocator (struct rx_hash_rules * rules, void * val)
  2923. #else
  2924. static struct rx_hash_item *
  2925. superset_allocator (rules, val)
  2926. struct rx_hash_rules * rules;
  2927. void * val;
  2928. #endif
  2929. {
  2930. struct rx_cache * cache
  2931. = ((struct rx_cache *)
  2932. ((char *)rules
  2933. - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
  2934. struct rx_superset * template = (struct rx_superset *)val;
  2935. struct rx_superset * newset
  2936. = ((struct rx_superset *)
  2937. rx_cache_malloc_or_get (cache,
  2938. &cache->free_supersets,
  2939. sizeof (*template)));
  2940. if (!newset)
  2941. return 0;
  2942. newset->refs = 0;
  2943. newset->car = template->car;
  2944. newset->id = template->car->id;
  2945. newset->cdr = template->cdr;
  2946. newset->superstate = 0;
  2947. rx_protect_superset (rx, template->cdr);
  2948. newset->hash_item.data = (void *)newset;
  2949. newset->hash_item.binding = 0;
  2950. return &newset->hash_item;
  2951. }
  2952. #ifdef __STDC__
  2953. static struct rx_hash *
  2954. super_hash_allocator (struct rx_hash_rules * rules)
  2955. #else
  2956. static struct rx_hash *
  2957. super_hash_allocator (rules)
  2958. struct rx_hash_rules * rules;
  2959. #endif
  2960. {
  2961. struct rx_cache * cache
  2962. = ((struct rx_cache *)
  2963. ((char *)rules
  2964. - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
  2965. return ((struct rx_hash *)
  2966. rx_cache_malloc_or_get (cache,
  2967. &cache->free_hash, sizeof (struct rx_hash)));
  2968. }
  2969. #ifdef __STDC__
  2970. static void
  2971. super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
  2972. #else
  2973. static void
  2974. super_hash_liberator (hash, rules)
  2975. struct rx_hash * hash;
  2976. struct rx_hash_rules * rules;
  2977. #endif
  2978. {
  2979. struct rx_cache * cache
  2980. = ((struct rx_cache *)
  2981. (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules));
  2982. rx_cache_free (cache, &cache->free_hash, (char *)hash);
  2983. }
  2984. #ifdef __STDC__
  2985. static void
  2986. superset_hash_item_liberator (struct rx_hash_item * it,
  2987. struct rx_hash_rules * rules)
  2988. #else
  2989. static void
  2990. superset_hash_item_liberator (it, rules) /* Well, it does ya know. */
  2991. struct rx_hash_item * it;
  2992. struct rx_hash_rules * rules;
  2993. #endif
  2994. {
  2995. }
  2996. int rx_cache_bound = 128;
  2997. static int rx_default_cache_got = 0;
  2998. #ifdef __STDC__
  2999. static int
  3000. bytes_for_cache_size (int supers, int cset_size)
  3001. #else
  3002. static int
  3003. bytes_for_cache_size (supers, cset_size)
  3004. int supers;
  3005. int cset_size;
  3006. #endif
  3007. {
  3008. /* What the hell is this? !!!*/
  3009. return (int)
  3010. ((float)supers *
  3011. ( (1.03 * (float) ( rx_sizeof_bitset (cset_size)
  3012. + sizeof (struct rx_super_edge)))
  3013. + (1.80 * (float) sizeof (struct rx_possible_future))
  3014. + (float) ( sizeof (struct rx_superstate)
  3015. + cset_size * sizeof (struct rx_inx))));
  3016. }
  3017. #ifdef __STDC__
  3018. static void
  3019. rx_morecore (struct rx_cache * cache)
  3020. #else
  3021. static void
  3022. rx_morecore (cache)
  3023. struct rx_cache * cache;
  3024. #endif
  3025. {
  3026. if (rx_default_cache_got >= rx_cache_bound)
  3027. return;
  3028. rx_default_cache_got += 16;
  3029. cache->superstates_allowed = rx_cache_bound;
  3030. {
  3031. struct rx_blocklist ** pos = &cache->memory;
  3032. int size = bytes_for_cache_size (16, cache->local_cset_size);
  3033. while (*pos)
  3034. pos = &(*pos)->next;
  3035. *pos = ((struct rx_blocklist *)
  3036. malloc (size + sizeof (struct rx_blocklist)));
  3037. if (!*pos)
  3038. return;
  3039. (*pos)->next = 0;
  3040. (*pos)->bytes = size;
  3041. cache->memory_pos = *pos;
  3042. cache->memory_addr = (char *)*pos + sizeof (**pos);
  3043. cache->bytes_left = size;
  3044. }
  3045. }
  3046. static struct rx_cache default_cache =
  3047. {
  3048. {
  3049. supersetcmp,
  3050. super_hash_allocator,
  3051. super_hash_liberator,
  3052. superset_allocator,
  3053. superset_hash_item_liberator,
  3054. },
  3055. 0,
  3056. 0,
  3057. 0,
  3058. 0,
  3059. rx_morecore,
  3060. 0,
  3061. 0,
  3062. 0,
  3063. 0,
  3064. 0,
  3065. 0,
  3066. 0,
  3067. 0,
  3068. 0,
  3069. 0,
  3070. 0,
  3071. 0,
  3072. 128,
  3073. 256,
  3074. rx_id_instruction_table,
  3075. {
  3076. 0,
  3077. 0,
  3078. {0},
  3079. {0},
  3080. {0}
  3081. }
  3082. };
  3083. /* This adds an element to a superstate set. These sets are lists, such
  3084. * that lists with == elements are ==. The empty set is returned by
  3085. * superset_cons (rx, 0, 0) and is NOT equivelent to
  3086. * (struct rx_superset)0.
  3087. */
  3088. #ifdef __STDC__
  3089. RX_DECL struct rx_superset *
  3090. rx_superset_cons (struct rx * rx,
  3091. struct rx_nfa_state *car, struct rx_superset *cdr)
  3092. #else
  3093. RX_DECL struct rx_superset *
  3094. rx_superset_cons (rx, car, cdr)
  3095. struct rx * rx;
  3096. struct rx_nfa_state *car;
  3097. struct rx_superset *cdr;
  3098. #endif
  3099. {
  3100. struct rx_cache * cache = rx->cache;
  3101. if (!car && !cdr)
  3102. {
  3103. if (!cache->empty_superset)
  3104. {
  3105. cache->empty_superset
  3106. = ((struct rx_superset *)
  3107. rx_cache_malloc_or_get (cache, &cache->free_supersets,
  3108. sizeof (struct rx_superset)));
  3109. if (!cache->empty_superset)
  3110. return 0;
  3111. bzero (cache->empty_superset, sizeof (struct rx_superset));
  3112. cache->empty_superset->refs = 1000;
  3113. }
  3114. return cache->empty_superset;
  3115. }
  3116. {
  3117. struct rx_superset template;
  3118. struct rx_hash_item * hit;
  3119. template.car = car;
  3120. template.cdr = cdr;
  3121. template.id = car->id;
  3122. /* While hash_store will protect cdr itself it might first allocate hash
  3123. tables and stuff which might cause it to be garbage collected before
  3124. it's protected -- [gsstark:19961026.2155EST] */
  3125. rx_protect_superset (rx, cdr);
  3126. hit = rx_hash_store (&cache->superset_table,
  3127. (unsigned long)car ^ car->id ^ (unsigned long)cdr,
  3128. (void *)&template,
  3129. &cache->superset_hash_rules);
  3130. rx_release_superset (rx, cdr);
  3131. return (hit
  3132. ? (struct rx_superset *)hit->data
  3133. : 0);
  3134. }
  3135. }
  3136. /* This computes a union of two NFA state sets. The sets do not have the
  3137. * same representation though. One is a RX_SUPERSET structure (part
  3138. * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
  3139. */
  3140. #ifdef __STDC__
  3141. RX_DECL struct rx_superset *
  3142. rx_superstate_eclosure_union
  3143. (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
  3144. #else
  3145. RX_DECL struct rx_superset *
  3146. rx_superstate_eclosure_union (rx, set, ecl)
  3147. struct rx * rx;
  3148. struct rx_superset *set;
  3149. struct rx_nfa_state_set *ecl;
  3150. #endif
  3151. {
  3152. if (!ecl)
  3153. return set;
  3154. if (!set->car)
  3155. return rx_superset_cons (rx, ecl->car,
  3156. rx_superstate_eclosure_union (rx, set, ecl->cdr));
  3157. if (set->car == ecl->car)
  3158. return rx_superstate_eclosure_union (rx, set, ecl->cdr);
  3159. {
  3160. struct rx_superset * tail;
  3161. struct rx_nfa_state * first;
  3162. if (set->car > ecl->car)
  3163. {
  3164. tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
  3165. first = set->car;
  3166. }
  3167. else
  3168. {
  3169. tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
  3170. first = ecl->car;
  3171. }
  3172. if (!tail)
  3173. return 0;
  3174. else
  3175. {
  3176. struct rx_superset * answer;
  3177. answer = rx_superset_cons (rx, first, tail);
  3178. if (!answer)
  3179. {
  3180. rx_protect_superset (rx, tail);
  3181. rx_release_superset (rx, tail);
  3182. return 0;
  3183. }
  3184. else
  3185. return answer;
  3186. }
  3187. }
  3188. }
  3189. /*
  3190. * This makes sure that a list of rx_distinct_futures contains
  3191. * a future for each possible set of side effects in the eclosure
  3192. * of a given state. This is some of the work of filling in a
  3193. * superstate transition.
  3194. */
  3195. #ifdef __STDC__
  3196. static struct rx_distinct_future *
  3197. include_futures (struct rx *rx,
  3198. struct rx_distinct_future *df, struct rx_nfa_state
  3199. *state, struct rx_superstate *superstate)
  3200. #else
  3201. static struct rx_distinct_future *
  3202. include_futures (rx, df, state, superstate)
  3203. struct rx *rx;
  3204. struct rx_distinct_future *df;
  3205. struct rx_nfa_state *state;
  3206. struct rx_superstate *superstate;
  3207. #endif
  3208. {
  3209. struct rx_possible_future *future;
  3210. struct rx_cache * cache = rx->cache;
  3211. for (future = state->futures; future; future = future->next)
  3212. {
  3213. struct rx_distinct_future *dfp;
  3214. struct rx_distinct_future *insert_before = 0;
  3215. if (df)
  3216. df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
  3217. for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
  3218. if (dfp->effects == future->effects)
  3219. break;
  3220. else
  3221. {
  3222. int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
  3223. if (order > 0)
  3224. {
  3225. insert_before = dfp;
  3226. dfp = 0;
  3227. break;
  3228. }
  3229. }
  3230. if (df)
  3231. df->next_same_super_edge[1]->next_same_super_edge[0] = df;
  3232. if (!dfp)
  3233. {
  3234. dfp
  3235. = ((struct rx_distinct_future *)
  3236. rx_cache_malloc_or_get (cache, &cache->free_discernable_futures,
  3237. sizeof (struct rx_distinct_future)));
  3238. if (!dfp)
  3239. return 0;
  3240. if (!df)
  3241. {
  3242. df = insert_before = dfp;
  3243. df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
  3244. }
  3245. else if (!insert_before)
  3246. insert_before = df;
  3247. else if (insert_before == df)
  3248. df = dfp;
  3249. dfp->next_same_super_edge[0] = insert_before;
  3250. dfp->next_same_super_edge[1]
  3251. = insert_before->next_same_super_edge[1];
  3252. dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
  3253. dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
  3254. dfp->next_same_dest = dfp->prev_same_dest = dfp;
  3255. dfp->future = 0;
  3256. dfp->present = superstate;
  3257. dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
  3258. dfp->future_frame.data = 0;
  3259. dfp->future_frame.data_2 = (void *) dfp;
  3260. dfp->side_effects_frame.inx
  3261. = rx->instruction_table[rx_do_side_effects];
  3262. dfp->side_effects_frame.data = 0;
  3263. dfp->side_effects_frame.data_2 = (void *) dfp;
  3264. dfp->effects = future->effects;
  3265. }
  3266. }
  3267. return df;
  3268. }
  3269. /* This constructs a new superstate from its state set. The only
  3270. * complexity here is memory management.
  3271. */
  3272. #ifdef __STDC__
  3273. RX_DECL struct rx_superstate *
  3274. rx_superstate (struct rx *rx,
  3275. struct rx_superset *set)
  3276. #else
  3277. RX_DECL struct rx_superstate *
  3278. rx_superstate (rx, set)
  3279. struct rx *rx;
  3280. struct rx_superset *set;
  3281. #endif
  3282. {
  3283. struct rx_cache * cache = rx->cache;
  3284. struct rx_superstate * superstate = 0;
  3285. /* Does the superstate already exist in the cache? */
  3286. if (set->superstate)
  3287. {
  3288. if (set->superstate->rx_id != rx->rx_id)
  3289. {
  3290. /* Aha. It is in the cache, but belongs to a superstate
  3291. * that refers to an NFA that no longer exists.
  3292. * (We know it no longer exists because it was evidently
  3293. * stored in the same region of memory as the current nfa
  3294. * yet it has a different id.)
  3295. */
  3296. superstate = set->superstate;
  3297. if (!superstate->is_semifree)
  3298. {
  3299. if (cache->lru_superstate == superstate)
  3300. {
  3301. cache->lru_superstate = superstate->next_recyclable;
  3302. if (cache->lru_superstate == superstate)
  3303. cache->lru_superstate = 0;
  3304. }
  3305. {
  3306. superstate->next_recyclable->prev_recyclable
  3307. = superstate->prev_recyclable;
  3308. superstate->prev_recyclable->next_recyclable
  3309. = superstate->next_recyclable;
  3310. if (!cache->semifree_superstate)
  3311. {
  3312. (cache->semifree_superstate
  3313. = superstate->next_recyclable
  3314. = superstate->prev_recyclable
  3315. = superstate);
  3316. }
  3317. else
  3318. {
  3319. superstate->next_recyclable = cache->semifree_superstate;
  3320. superstate->prev_recyclable
  3321. = cache->semifree_superstate->prev_recyclable;
  3322. superstate->next_recyclable->prev_recyclable
  3323. = superstate;
  3324. superstate->prev_recyclable->next_recyclable
  3325. = superstate;
  3326. cache->semifree_superstate = superstate;
  3327. }
  3328. ++cache->semifree_superstates;
  3329. }
  3330. }
  3331. set->superstate = 0;
  3332. goto handle_cache_miss;
  3333. }
  3334. ++cache->hits;
  3335. superstate = set->superstate;
  3336. rx_refresh_this_superstate (cache, superstate);
  3337. return superstate;
  3338. }
  3339. handle_cache_miss:
  3340. /* This point reached only for cache misses. */
  3341. ++cache->misses;
  3342. #if RX_DEBUG
  3343. if (rx_debug_trace > 1)
  3344. {
  3345. struct rx_superset * setp = set;
  3346. fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
  3347. while (setp)
  3348. {
  3349. fprintf (stderr, "%d ", setp->id);
  3350. setp = setp->cdr;
  3351. }
  3352. fprintf (stderr, "(%d)\n", set);
  3353. }
  3354. #endif
  3355. superstate = (struct rx_superstate *)rx_cache_get_superstate (cache);
  3356. if (!superstate)
  3357. return 0;
  3358. if (!cache->lru_superstate)
  3359. (cache->lru_superstate
  3360. = superstate->next_recyclable
  3361. = superstate->prev_recyclable
  3362. = superstate);
  3363. else
  3364. {
  3365. superstate->next_recyclable = cache->lru_superstate;
  3366. superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
  3367. ( superstate->prev_recyclable->next_recyclable
  3368. = superstate->next_recyclable->prev_recyclable
  3369. = superstate);
  3370. }
  3371. superstate->rx_id = rx->rx_id;
  3372. superstate->transition_refs = 0;
  3373. superstate->locks = 0;
  3374. superstate->is_semifree = 0;
  3375. set->superstate = superstate;
  3376. superstate->contents = set;
  3377. rx_protect_superset (rx, set);
  3378. superstate->edges = 0;
  3379. {
  3380. int x;
  3381. /* None of the transitions from this superstate are known yet. */
  3382. for (x = 0; x < rx->local_cset_size; ++x) /* &&&&& 3.8 % */
  3383. {
  3384. struct rx_inx * ifr = &superstate->transitions[x];
  3385. ifr->inx = rx->instruction_table [rx_cache_miss];
  3386. ifr->data = ifr->data_2 = 0;
  3387. }
  3388. }
  3389. return superstate;
  3390. }
  3391. /* This computes the destination set of one edge of the superstate NFA.
  3392. * Note that a RX_DISTINCT_FUTURE is a superstate edge.
  3393. * Returns 0 on an allocation failure.
  3394. */
  3395. #ifdef __STDC__
  3396. static int
  3397. solve_destination (struct rx *rx, struct rx_distinct_future *df)
  3398. #else
  3399. static int
  3400. solve_destination (rx, df)
  3401. struct rx *rx;
  3402. struct rx_distinct_future *df;
  3403. #endif
  3404. {
  3405. struct rx_super_edge *tc = df->edge;
  3406. struct rx_superset *nfa_state;
  3407. struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
  3408. struct rx_superset *solution = nil_set;
  3409. struct rx_superstate *dest;
  3410. rx_protect_superset (rx, solution);
  3411. /* Iterate over all NFA states in the state set of this superstate. */
  3412. for (nfa_state = df->present->contents;
  3413. nfa_state->car;
  3414. nfa_state = nfa_state->cdr)
  3415. {
  3416. struct rx_nfa_edge *e;
  3417. /* Iterate over all edges of each NFA state. */
  3418. for (e = nfa_state->car->edges; e; e = e->next)
  3419. /* If we find an edge that is labeled with
  3420. * the characters we are solving for.....
  3421. */
  3422. if (rx_bitset_is_subset (rx->local_cset_size,
  3423. tc->cset, e->params.cset))
  3424. {
  3425. struct rx_nfa_state *n = e->dest;
  3426. struct rx_possible_future *pf;
  3427. /* ....search the partial epsilon closures of the destination
  3428. * of that edge for a path that involves the same set of
  3429. * side effects we are solving for.
  3430. * If we find such a RX_POSSIBLE_FUTURE, we add members to the
  3431. * stateset we are computing.
  3432. */
  3433. for (pf = n->futures; pf; pf = pf->next)
  3434. if (pf->effects == df->effects)
  3435. {
  3436. struct rx_superset * old_sol;
  3437. old_sol = solution;
  3438. solution = rx_superstate_eclosure_union (rx, solution,
  3439. pf->destset);
  3440. if (!solution)
  3441. return 0;
  3442. rx_protect_superset (rx, solution);
  3443. rx_release_superset (rx, old_sol);
  3444. }
  3445. }
  3446. }
  3447. /* It is possible that the RX_DISTINCT_FUTURE we are working on has
  3448. * the empty set of NFA states as its definition. In that case, this
  3449. * is a failure point.
  3450. */
  3451. if (solution == nil_set)
  3452. {
  3453. df->future_frame.inx = (void *) rx_backtrack;
  3454. df->future_frame.data = 0;
  3455. df->future_frame.data_2 = 0;
  3456. return 1;
  3457. }
  3458. dest = rx_superstate (rx, solution);
  3459. rx_release_superset (rx, solution);
  3460. if (!dest)
  3461. return 0;
  3462. {
  3463. struct rx_distinct_future *dft;
  3464. dft = df;
  3465. df->prev_same_dest->next_same_dest = 0;
  3466. while (dft)
  3467. {
  3468. dft->future = dest;
  3469. dft->future_frame.inx = rx->instruction_table[rx_next_char];
  3470. dft->future_frame.data = (void *) dest->transitions;
  3471. dft = dft->next_same_dest;
  3472. }
  3473. df->prev_same_dest->next_same_dest = df;
  3474. }
  3475. if (!dest->transition_refs)
  3476. dest->transition_refs = df;
  3477. else
  3478. {
  3479. struct rx_distinct_future *dft = dest->transition_refs->next_same_dest;
  3480. dest->transition_refs->next_same_dest = df->next_same_dest;
  3481. df->next_same_dest->prev_same_dest = dest->transition_refs;
  3482. df->next_same_dest = dft;
  3483. dft->prev_same_dest = df;
  3484. }
  3485. return 1;
  3486. }
  3487. /* This takes a superstate and a character, and computes some edges
  3488. * from the superstate NFA. In particular, this computes all edges
  3489. * that lead from SUPERSTATE given CHR. This function also
  3490. * computes the set of characters that share this edge set.
  3491. * This returns 0 on allocation error.
  3492. * The character set and list of edges are returned through
  3493. * the paramters CSETOUT and DFOUT.
  3494. } */
  3495. #ifdef __STDC__
  3496. static int
  3497. compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout,
  3498. rx_Bitset csetout, struct rx_superstate *superstate,
  3499. unsigned char chr)
  3500. #else
  3501. static int
  3502. compute_super_edge (rx, dfout, csetout, superstate, chr)
  3503. struct rx *rx;
  3504. struct rx_distinct_future **dfout;
  3505. rx_Bitset csetout;
  3506. struct rx_superstate *superstate;
  3507. unsigned char chr;
  3508. #endif
  3509. {
  3510. struct rx_superset *stateset = superstate->contents;
  3511. /* To compute the set of characters that share edges with CHR,
  3512. * we start with the full character set, and subtract.
  3513. */
  3514. rx_bitset_universe (rx->local_cset_size, csetout);
  3515. *dfout = 0;
  3516. /* Iterate over the NFA states in the superstate state-set. */
  3517. while (stateset->car)
  3518. {
  3519. struct rx_nfa_edge *e;
  3520. for (e = stateset->car->edges; e; e = e->next)
  3521. if (RX_bitset_member (e->params.cset, chr))
  3522. {
  3523. /* If we find an NFA edge that applies, we make sure there
  3524. * are corresponding edges in the superstate NFA.
  3525. */
  3526. {
  3527. struct rx_distinct_future * saved;
  3528. saved = *dfout;
  3529. *dfout = include_futures (rx, *dfout, e->dest, superstate);
  3530. if (!*dfout)
  3531. {
  3532. struct rx_distinct_future * df;
  3533. df = saved;
  3534. if (df)
  3535. df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
  3536. while (df)
  3537. {
  3538. struct rx_distinct_future *dft;
  3539. dft = df;
  3540. df = df->next_same_super_edge[0];
  3541. if (dft->future && dft->future->transition_refs == dft)
  3542. {
  3543. dft->future->transition_refs = dft->next_same_dest;
  3544. if (dft->future->transition_refs == dft)
  3545. dft->future->transition_refs = 0;
  3546. }
  3547. dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
  3548. dft->prev_same_dest->next_same_dest = dft->next_same_dest;
  3549. rx_cache_free (rx->cache,
  3550. &rx->cache->free_discernable_futures,
  3551. (char *)dft);
  3552. }
  3553. return 0;
  3554. }
  3555. }
  3556. /* We also trim the character set a bit. */
  3557. rx_bitset_intersection (rx->local_cset_size,
  3558. csetout, e->params.cset);
  3559. }
  3560. else
  3561. /* An edge that doesn't apply at least tells us some characters
  3562. * that don't share the same edge set as CHR.
  3563. */
  3564. rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset);
  3565. stateset = stateset->cdr;
  3566. }
  3567. return 1;
  3568. }
  3569. /* This is a constructor for RX_SUPER_EDGE structures. These are
  3570. * wrappers for lists of superstate NFA edges that share character sets labels.
  3571. * If a transition class contains more than one rx_distinct_future (superstate
  3572. * edge), then it represents a non-determinism in the superstate NFA.
  3573. */
  3574. #ifdef __STDC__
  3575. static struct rx_super_edge *
  3576. rx_super_edge (struct rx *rx,
  3577. struct rx_superstate *super, rx_Bitset cset,
  3578. struct rx_distinct_future *df)
  3579. #else
  3580. static struct rx_super_edge *
  3581. rx_super_edge (rx, super, cset, df)
  3582. struct rx *rx;
  3583. struct rx_superstate *super;
  3584. rx_Bitset cset;
  3585. struct rx_distinct_future *df;
  3586. #endif
  3587. {
  3588. struct rx_super_edge *tc =
  3589. (struct rx_super_edge *)rx_cache_malloc_or_get
  3590. (rx->cache, &rx->cache->free_transition_classes,
  3591. sizeof (struct rx_super_edge) + rx_sizeof_bitset (rx->local_cset_size));
  3592. if (!tc)
  3593. return 0;
  3594. tc->next = super->edges;
  3595. super->edges = tc;
  3596. tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
  3597. tc->rx_backtrack_frame.data = 0;
  3598. tc->rx_backtrack_frame.data_2 = (void *) tc;
  3599. tc->options = df;
  3600. tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc));
  3601. rx_bitset_assign (rx->local_cset_size, tc->cset, cset);
  3602. if (df)
  3603. {
  3604. struct rx_distinct_future * dfp = df;
  3605. df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
  3606. while (dfp)
  3607. {
  3608. dfp->edge = tc;
  3609. dfp = dfp->next_same_super_edge[0];
  3610. }
  3611. df->next_same_super_edge[1]->next_same_super_edge[0] = df;
  3612. }
  3613. return tc;
  3614. }
  3615. /* There are three kinds of cache miss. The first occurs when a
  3616. * transition is taken that has never been computed during the
  3617. * lifetime of the source superstate. That cache miss is handled by
  3618. * calling COMPUTE_SUPER_EDGE. The second kind of cache miss
  3619. * occurs when the destination superstate of a transition doesn't
  3620. * exist. SOLVE_DESTINATION is used to construct the destination superstate.
  3621. * Finally, the third kind of cache miss occurs when the destination
  3622. * superstate of a transition is in a `semi-free state'. That case is
  3623. * handled by UNFREE_SUPERSTATE.
  3624. *
  3625. * The function of HANDLE_CACHE_MISS is to figure out which of these
  3626. * cases applies.
  3627. */
  3628. #ifdef __STDC__
  3629. static void
  3630. install_partial_transition (struct rx_superstate *super,
  3631. struct rx_inx *answer,
  3632. RX_subset set, int offset)
  3633. #else
  3634. static void
  3635. install_partial_transition (super, answer, set, offset)
  3636. struct rx_superstate *super;
  3637. struct rx_inx *answer;
  3638. RX_subset set;
  3639. int offset;
  3640. #endif
  3641. {
  3642. int start = offset;
  3643. int end = start + 32;
  3644. RX_subset pos = 1;
  3645. struct rx_inx * transitions = super->transitions;
  3646. while (start < end)
  3647. {
  3648. if (set & pos)
  3649. transitions[start] = *answer;
  3650. pos <<= 1;
  3651. ++start;
  3652. }
  3653. }
  3654. #ifdef __STDC__
  3655. RX_DECL struct rx_inx *
  3656. rx_handle_cache_miss
  3657. (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data)
  3658. #else
  3659. RX_DECL struct rx_inx *
  3660. rx_handle_cache_miss (rx, super, chr, data)
  3661. struct rx *rx;
  3662. struct rx_superstate *super;
  3663. unsigned char chr;
  3664. void *data;
  3665. #endif
  3666. {
  3667. int offset = chr / RX_subset_bits;
  3668. struct rx_distinct_future *df = data;
  3669. if (!df) /* must be the shared_cache_miss_frame */
  3670. {
  3671. /* Perhaps this is just a transition waiting to be filled. */
  3672. struct rx_super_edge *tc;
  3673. RX_subset mask = rx_subset_singletons [chr % RX_subset_bits];
  3674. for (tc = super->edges; tc; tc = tc->next)
  3675. if (tc->cset[offset] & mask)
  3676. {
  3677. struct rx_inx * answer;
  3678. df = tc->options;
  3679. answer = ((tc->options->next_same_super_edge[0] != tc->options)
  3680. ? &tc->rx_backtrack_frame
  3681. : (df->effects
  3682. ? &df->side_effects_frame
  3683. : &df->future_frame));
  3684. install_partial_transition (super, answer,
  3685. tc->cset [offset], offset * 32);
  3686. return answer;
  3687. }
  3688. /* Otherwise, it's a flushed or newly encountered edge. */
  3689. {
  3690. char cset_space[1024]; /* this limit is far from unreasonable */
  3691. rx_Bitset trcset;
  3692. struct rx_inx *answer;
  3693. if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space))
  3694. return 0; /* If the arbitrary limit is hit, always fail */
  3695. /* cleanly. */
  3696. trcset = (rx_Bitset)cset_space;
  3697. rx_lock_superstate (rx, super);
  3698. if (!compute_super_edge (rx, &df, trcset, super, chr))
  3699. {
  3700. rx_unlock_superstate (rx, super);
  3701. return 0;
  3702. }
  3703. if (!df) /* We just computed the fail transition. */
  3704. {
  3705. static struct rx_inx
  3706. shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 };
  3707. answer = &shared_fail_frame;
  3708. }
  3709. else
  3710. {
  3711. tc = rx_super_edge (rx, super, trcset, df);
  3712. if (!tc)
  3713. {
  3714. rx_unlock_superstate (rx, super);
  3715. return 0;
  3716. }
  3717. answer = ((tc->options->next_same_super_edge[0] != tc->options)
  3718. ? &tc->rx_backtrack_frame
  3719. : (df->effects
  3720. ? &df->side_effects_frame
  3721. : &df->future_frame));
  3722. }
  3723. install_partial_transition (super, answer,
  3724. trcset[offset], offset * 32);
  3725. rx_unlock_superstate (rx, super);
  3726. return answer;
  3727. }
  3728. }
  3729. else if (df->future) /* A cache miss on an edge with a future? Must be
  3730. * a semi-free destination. */
  3731. {
  3732. if (df->future->is_semifree)
  3733. refresh_semifree_superstate (rx->cache, df->future);
  3734. return &df->future_frame;
  3735. }
  3736. else
  3737. /* no future superstate on an existing edge */
  3738. {
  3739. rx_lock_superstate (rx, super);
  3740. if (!solve_destination (rx, df))
  3741. {
  3742. rx_unlock_superstate (rx, super);
  3743. return 0;
  3744. }
  3745. if (!df->effects
  3746. && (df->edge->options->next_same_super_edge[0] == df->edge->options))
  3747. install_partial_transition (super, &df->future_frame,
  3748. df->edge->cset[offset], offset * 32);
  3749. rx_unlock_superstate (rx, super);
  3750. return &df->future_frame;
  3751. }
  3752. }
  3753. /* The rest of the code provides a regex.c compatable interface. */
  3754. __const__ char *re_error_msg[] =
  3755. {
  3756. 0, /* REG_NOUT */
  3757. "No match", /* REG_NOMATCH */
  3758. "Invalid regular expression", /* REG_BADPAT */
  3759. "Invalid collation character", /* REG_ECOLLATE */
  3760. "Invalid character class name", /* REG_ECTYPE */
  3761. "Trailing backslash", /* REG_EESCAPE */
  3762. "Invalid back reference", /* REG_ESUBREG */
  3763. "Unmatched [ or [^", /* REG_EBRACK */
  3764. "Unmatched ( or \\(", /* REG_EPAREN */
  3765. "Unmatched \\{", /* REG_EBRACE */
  3766. "Invalid content of \\{\\}", /* REG_BADBR */
  3767. "Invalid range end", /* REG_ERANGE */
  3768. "Memory exhausted", /* REG_ESPACE */
  3769. "Invalid preceding regular expression", /* REG_BADRPT */
  3770. "Premature end of regular expression", /* REG_EEND */
  3771. "Regular expression too big", /* REG_ESIZE */
  3772. "Unmatched ) or \\)", /* REG_ERPAREN */
  3773. };
  3774. /*
  3775. * Macros used while compiling patterns.
  3776. *
  3777. * By convention, PEND points just past the end of the uncompiled pattern,
  3778. * P points to the read position in the pattern. `translate' is the name
  3779. * of the translation table (`TRANSLATE' is the name of a macro that looks
  3780. * things up in `translate').
  3781. */
  3782. /*
  3783. * Fetch the next character in the uncompiled pattern---translating it
  3784. * if necessary. *Also cast from a signed character in the constant
  3785. * string passed to us by the user to an unsigned char that we can use
  3786. * as an array index (in, e.g., `translate').
  3787. */
  3788. #define PATFETCH(c) \
  3789. do {if (p == pend) return REG_EEND; \
  3790. c = (unsigned char) *p++; \
  3791. c = translate[c]; \
  3792. } while (0)
  3793. /*
  3794. * Fetch the next character in the uncompiled pattern, with no
  3795. * translation.
  3796. */
  3797. #define PATFETCH_RAW(c) \
  3798. do {if (p == pend) return REG_EEND; \
  3799. c = (unsigned char) *p++; \
  3800. } while (0)
  3801. /* Go backwards one character in the pattern. */
  3802. #define PATUNFETCH p--
  3803. #define TRANSLATE(d) translate[(unsigned char) (d)]
  3804. typedef unsigned regnum_t;
  3805. /* Since offsets can go either forwards or backwards, this type needs to
  3806. * be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.
  3807. */
  3808. typedef int pattern_offset_t;
  3809. typedef struct
  3810. {
  3811. struct rexp_node ** top_expression; /* was begalt */
  3812. struct rexp_node ** last_expression; /* was laststart */
  3813. pattern_offset_t inner_group_offset;
  3814. regnum_t regnum;
  3815. } compile_stack_elt_t;
  3816. typedef struct
  3817. {
  3818. compile_stack_elt_t *stack;
  3819. unsigned size;
  3820. unsigned avail; /* Offset of next open position. */
  3821. } compile_stack_type;
  3822. static boolean
  3823. group_in_compile_stack (compile_stack_type, regnum_t);
  3824. static reg_errcode_t
  3825. compile_range (struct re_pattern_buffer *, rx_Bitset,
  3826. __const__ char **, __const__ char *,
  3827. unsigned char *, reg_syntax_t,
  3828. rx_Bitset, char *);
  3829. static void find_backrefs (char *, struct rexp_node *,
  3830. struct re_se_params *);
  3831. static int compute_fastset (struct re_pattern_buffer *,
  3832. struct rexp_node *);
  3833. static int is_anchored (struct rexp_node *, rx_side_effect);
  3834. static struct rexp_node
  3835. *remove_unecessary_side_effects
  3836. (struct rx *, char *,
  3837. struct rexp_node *,
  3838. struct re_se_params *);
  3839. static int pointless_if_repeated (struct rexp_node *,
  3840. struct re_se_params *);
  3841. static int registers_on_stack (struct re_pattern_buffer *,
  3842. struct rexp_node *,
  3843. int, struct re_se_params *);
  3844. static int has_any_se (struct rx *, struct rexp_node *);
  3845. static int has_non_idempotent_epsilon_path
  3846. (struct rx *, struct rexp_node *,
  3847. struct re_se_params *);
  3848. static int begins_with_complex_se (struct rx *, struct rexp_node *);
  3849. static void speed_up_alt (struct rx *, struct rexp_node *, int);
  3850. RX_DECL reg_errcode_t
  3851. rx_compile (__const__ char *, int, reg_syntax_t,
  3852. struct re_pattern_buffer *);
  3853. RX_DECL void rx_blow_up_fastmap (struct re_pattern_buffer *);
  3854. static __inline__ enum rx_get_burst_return
  3855. re_search_2_get_burst (struct rx_string_position *,
  3856. void *, int);
  3857. static __inline__ enum rx_back_check_return
  3858. re_search_2_back_check (struct rx_string_position *, int,
  3859. int, unsigned char *, void *, int);
  3860. static __inline__ int
  3861. re_search_2_fetch_char (struct rx_string_position *,
  3862. int, void *, int);
  3863. #define INIT_COMPILE_STACK_SIZE 32
  3864. #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
  3865. #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
  3866. /* The next available element. */
  3867. #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
  3868. /* Set the bit for character C in a list. */
  3869. #define SET_LIST_BIT(c) \
  3870. (b[((unsigned char) (c)) / CHARBITS] \
  3871. |= 1 << (((unsigned char) c) % CHARBITS))
  3872. /* Get the next unsigned number in the uncompiled pattern. */
  3873. #define GET_UNSIGNED_NUMBER(num) \
  3874. { if (p != pend) \
  3875. { \
  3876. PATFETCH (c); \
  3877. while (isdigit (c)) \
  3878. { \
  3879. if (num < 0) \
  3880. num = 0; \
  3881. num = num * 10 + c - '0'; \
  3882. if (p == pend) \
  3883. break; \
  3884. PATFETCH (c); \
  3885. } \
  3886. } \
  3887. }
  3888. #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
  3889. #define IS_CHAR_CLASS(string) \
  3890. (!strcmp (string, "alpha") || !strcmp (string, "upper") \
  3891. || !strcmp (string, "lower") || !strcmp (string, "digit") \
  3892. || !strcmp (string, "alnum") || !strcmp (string, "xdigit") \
  3893. || !strcmp (string, "space") || !strcmp (string, "print") \
  3894. || !strcmp (string, "punct") || !strcmp (string, "graph") \
  3895. || !strcmp (string, "cntrl") || !strcmp (string, "blank"))
  3896. /* These predicates are used in regex_compile. */
  3897. /* P points to just after a ^ in PATTERN. Return true if that ^ comes
  3898. * after an alternative or a begin-subexpression. We assume there is at
  3899. * least one character before the ^.
  3900. */
  3901. #ifdef __STDC__
  3902. static boolean
  3903. at_begline_loc_p (__const__ char *pattern, __const__ char * p, reg_syntax_t syntax)
  3904. #else
  3905. static boolean
  3906. at_begline_loc_p (pattern, p, syntax)
  3907. __const__ char *pattern;
  3908. __const__ char * p;
  3909. reg_syntax_t syntax;
  3910. #endif
  3911. {
  3912. __const__ char *prev = p - 2;
  3913. boolean prev_prev_backslash = ((prev > pattern) && (prev[-1] == '\\'));
  3914. return
  3915. (/* After a subexpression? */
  3916. ((*prev == '(') && ((syntax & RE_NO_BK_PARENS) || prev_prev_backslash))
  3917. ||
  3918. /* After an alternative? */
  3919. ((*prev == '|') && ((syntax & RE_NO_BK_VBAR) || prev_prev_backslash))
  3920. );
  3921. }
  3922. /* The dual of at_begline_loc_p. This one is for $. We assume there is
  3923. * at least one character after the $, i.e., `P < PEND'.
  3924. */
  3925. #ifdef __STDC__
  3926. static boolean
  3927. at_endline_loc_p (__const__ char *p, __const__ char *pend, int syntax)
  3928. #else
  3929. static boolean
  3930. at_endline_loc_p (p, pend, syntax)
  3931. __const__ char *p;
  3932. __const__ char *pend;
  3933. int syntax;
  3934. #endif
  3935. {
  3936. __const__ char *next = p;
  3937. boolean next_backslash = (*next == '\\');
  3938. __const__ char *next_next = (p + 1 < pend) ? (p + 1) : 0;
  3939. return
  3940. (
  3941. /* Before a subexpression? */
  3942. ((syntax & RE_NO_BK_PARENS)
  3943. ? (*next == ')')
  3944. : (next_backslash && next_next && (*next_next == ')')))
  3945. ||
  3946. /* Before an alternative? */
  3947. ((syntax & RE_NO_BK_VBAR)
  3948. ? (*next == '|')
  3949. : (next_backslash && next_next && (*next_next == '|')))
  3950. );
  3951. }
  3952. unsigned char rx_id_translation[256] =
  3953. {
  3954. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
  3955. 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
  3956. 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
  3957. 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
  3958. 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
  3959. 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
  3960. 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
  3961. 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
  3962. 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
  3963. 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
  3964. 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
  3965. 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
  3966. 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
  3967. 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
  3968. 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
  3969. 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
  3970. 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
  3971. 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
  3972. 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
  3973. 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
  3974. 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
  3975. 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
  3976. 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
  3977. 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
  3978. 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
  3979. 250, 251, 252, 253, 254, 255
  3980. };
  3981. /* The compiler keeps an inverted translation table.
  3982. * This looks up/inititalize elements.
  3983. * VALID is an array of booleans that validate CACHE.
  3984. */
  3985. #ifdef __STDC__
  3986. static rx_Bitset
  3987. inverse_translation (struct re_pattern_buffer * rxb,
  3988. char * valid, rx_Bitset cache,
  3989. unsigned char * translate, int c)
  3990. #else
  3991. static rx_Bitset
  3992. inverse_translation (rxb, valid, cache, translate, c)
  3993. struct re_pattern_buffer * rxb;
  3994. char * valid;
  3995. rx_Bitset cache;
  3996. unsigned char * translate;
  3997. int c;
  3998. #endif
  3999. {
  4000. rx_Bitset cs
  4001. = cache + c * rx_bitset_numb_subsets (rxb->rx.local_cset_size);
  4002. if (!valid[c])
  4003. {
  4004. int x;
  4005. int c_tr = TRANSLATE(c);
  4006. rx_bitset_null (rxb->rx.local_cset_size, cs);
  4007. for (x = 0; x < 256; ++x) /* &&&& 13.37 */
  4008. if (TRANSLATE(x) == c_tr)
  4009. RX_bitset_enjoin (cs, x);
  4010. valid[c] = 1;
  4011. }
  4012. return cs;
  4013. }
  4014. /* More subroutine declarations and macros for regex_compile. */
  4015. /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
  4016. false if it's not. */
  4017. #ifdef __STDC__
  4018. static boolean
  4019. group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
  4020. #else
  4021. static boolean
  4022. group_in_compile_stack (compile_stack, regnum)
  4023. compile_stack_type compile_stack;
  4024. regnum_t regnum;
  4025. #endif
  4026. {
  4027. int this_element;
  4028. for (this_element = compile_stack.avail - 1;
  4029. this_element >= 0;
  4030. this_element--)
  4031. if (compile_stack.stack[this_element].regnum == regnum)
  4032. return true;
  4033. return false;
  4034. }
  4035. /*
  4036. * Read the ending character of a range (in a bracket expression) from the
  4037. * uncompiled pattern *P_PTR (which ends at PEND). We assume the
  4038. * starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
  4039. * Then we set the translation of all bits between the starting and
  4040. * ending characters (inclusive) in the compiled pattern B.
  4041. *
  4042. * Return an error code.
  4043. *
  4044. * We use these short variable names so we can use the same macros as
  4045. * `regex_compile' itself.
  4046. */
  4047. #ifdef __STDC__
  4048. static reg_errcode_t
  4049. compile_range (struct re_pattern_buffer * rxb, rx_Bitset cs,
  4050. __const__ char ** p_ptr, __const__ char * pend,
  4051. unsigned char * translate, reg_syntax_t syntax,
  4052. rx_Bitset inv_tr, char * valid_inv_tr)
  4053. #else
  4054. static reg_errcode_t
  4055. compile_range (rxb, cs, p_ptr, pend, translate, syntax, inv_tr, valid_inv_tr)
  4056. struct re_pattern_buffer * rxb;
  4057. rx_Bitset cs;
  4058. __const__ char ** p_ptr;
  4059. __const__ char * pend;
  4060. unsigned char * translate;
  4061. reg_syntax_t syntax;
  4062. rx_Bitset inv_tr;
  4063. char * valid_inv_tr;
  4064. #endif
  4065. {
  4066. unsigned this_char;
  4067. __const__ char *p = *p_ptr;
  4068. unsigned char range_end;
  4069. unsigned char range_start = TRANSLATE(p[-2]);
  4070. if (p == pend)
  4071. return REG_ERANGE;
  4072. PATFETCH (range_end);
  4073. (*p_ptr)++;
  4074. if (range_start > range_end)
  4075. return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
  4076. for (this_char = range_start; this_char <= range_end; this_char++)
  4077. {
  4078. rx_Bitset it =
  4079. inverse_translation (rxb, valid_inv_tr, inv_tr, translate, this_char);
  4080. rx_bitset_union (rxb->rx.local_cset_size, cs, it);
  4081. }
  4082. return REG_NOERROR;
  4083. }
  4084. /* This searches a regexp for backreference side effects.
  4085. * It fills in the array OUT with 1 at the index of every register pair
  4086. * referenced by a backreference.
  4087. *
  4088. * This is used to help optimize patterns for searching. The information is
  4089. * useful because, if the caller doesn't want register values, backreferenced
  4090. * registers are the only registers for which we need rx_backtrack.
  4091. */
  4092. #ifdef __STDC__
  4093. static void
  4094. find_backrefs (char * out, struct rexp_node * rexp,
  4095. struct re_se_params * params)
  4096. #else
  4097. static void
  4098. find_backrefs (out, rexp, params)
  4099. char * out;
  4100. struct rexp_node * rexp;
  4101. struct re_se_params * params;
  4102. #endif
  4103. {
  4104. if (rexp)
  4105. switch (rexp->type)
  4106. {
  4107. case r_cset:
  4108. case r_data:
  4109. return;
  4110. case r_alternate:
  4111. case r_concat:
  4112. case r_opt:
  4113. case r_star:
  4114. case r_2phase_star:
  4115. find_backrefs (out, rexp->params.pair.left, params);
  4116. find_backrefs (out, rexp->params.pair.right, params);
  4117. return;
  4118. case r_side_effect:
  4119. if ( ((long)rexp->params.side_effect >= 0)
  4120. && (params [(long)rexp->params.side_effect].se == re_se_backref))
  4121. out[ params [(long)rexp->params.side_effect].op1] = 1;
  4122. return;
  4123. }
  4124. }
  4125. /* Returns 0 unless the pattern can match the empty string. */
  4126. #ifdef __STDC__
  4127. static int
  4128. compute_fastset (struct re_pattern_buffer * rxb, struct rexp_node * rexp)
  4129. #else
  4130. static int
  4131. compute_fastset (rxb, rexp)
  4132. struct re_pattern_buffer * rxb;
  4133. struct rexp_node * rexp;
  4134. #endif
  4135. {
  4136. if (!rexp)
  4137. return 1;
  4138. switch (rexp->type)
  4139. {
  4140. case r_data:
  4141. return 1;
  4142. case r_cset:
  4143. {
  4144. rx_bitset_union (rxb->rx.local_cset_size,
  4145. rxb->fastset, rexp->params.cset);
  4146. }
  4147. return 0;
  4148. case r_concat:
  4149. return (compute_fastset (rxb, rexp->params.pair.left)
  4150. && compute_fastset (rxb, rexp->params.pair.right));
  4151. case r_2phase_star:
  4152. compute_fastset (rxb, rexp->params.pair.left);
  4153. /* compute_fastset (rxb, rexp->params.pair.right); nope... */
  4154. return 1;
  4155. case r_alternate:
  4156. return !!(compute_fastset (rxb, rexp->params.pair.left)
  4157. + compute_fastset (rxb, rexp->params.pair.right));
  4158. case r_opt:
  4159. case r_star:
  4160. compute_fastset (rxb, rexp->params.pair.left);
  4161. return 1;
  4162. case r_side_effect:
  4163. return 1;
  4164. }
  4165. /* this should never happen */
  4166. return 0;
  4167. }
  4168. /* returns
  4169. * 1 -- yes, definately anchored by the given side effect.
  4170. * 2 -- maybe anchored, maybe the empty string.
  4171. * 0 -- definately not anchored
  4172. * There is simply no other possibility.
  4173. */
  4174. #ifdef __STDC__
  4175. static int
  4176. is_anchored (struct rexp_node * rexp, rx_side_effect se)
  4177. #else
  4178. static int
  4179. is_anchored (rexp, se)
  4180. struct rexp_node * rexp;
  4181. rx_side_effect se;
  4182. #endif
  4183. {
  4184. if (!rexp)
  4185. return 2;
  4186. switch (rexp->type)
  4187. {
  4188. case r_cset:
  4189. case r_data:
  4190. return 0;
  4191. case r_concat:
  4192. case r_2phase_star:
  4193. {
  4194. int l = is_anchored (rexp->params.pair.left, se);
  4195. return (l == 2 ? is_anchored (rexp->params.pair.right, se) : l);
  4196. }
  4197. case r_alternate:
  4198. {
  4199. int l = is_anchored (rexp->params.pair.left, se);
  4200. int r = l ? is_anchored (rexp->params.pair.right, se) : 0;
  4201. if (l == r)
  4202. return l;
  4203. else if ((l == 0) || (r == 0))
  4204. return 0;
  4205. else
  4206. return 2;
  4207. }
  4208. case r_opt:
  4209. case r_star:
  4210. return is_anchored (rexp->params.pair.left, se) ? 2 : 0;
  4211. case r_side_effect:
  4212. return ((rexp->params.side_effect == se)
  4213. ? 1 : 2);
  4214. }
  4215. /* this should never happen */
  4216. return 0;
  4217. }
  4218. /* This removes register assignments that aren't required by backreferencing.
  4219. * This can speed up explore_future, especially if it eliminates
  4220. * non-determinism in the superstate NFA.
  4221. *
  4222. * NEEDED is an array of characters, presumably filled in by FIND_BACKREFS.
  4223. * The non-zero elements of the array indicate which register assignments
  4224. * can NOT be removed from the expression.
  4225. */
  4226. #ifdef __STDC__
  4227. static struct rexp_node *
  4228. remove_unecessary_side_effects (struct rx * rx, char * needed,
  4229. struct rexp_node * rexp,
  4230. struct re_se_params * params)
  4231. #else
  4232. static struct rexp_node *
  4233. remove_unecessary_side_effects (rx, needed, rexp, params)
  4234. struct rx * rx;
  4235. char * needed;
  4236. struct rexp_node * rexp;
  4237. struct re_se_params * params;
  4238. #endif
  4239. {
  4240. struct rexp_node * l;
  4241. struct rexp_node * r;
  4242. if (!rexp)
  4243. return 0;
  4244. else
  4245. switch (rexp->type)
  4246. {
  4247. case r_cset:
  4248. case r_data:
  4249. return rexp;
  4250. case r_alternate:
  4251. case r_concat:
  4252. case r_2phase_star:
  4253. l = remove_unecessary_side_effects (rx, needed,
  4254. rexp->params.pair.left, params);
  4255. r = remove_unecessary_side_effects (rx, needed,
  4256. rexp->params.pair.right, params);
  4257. if ((l && r) || (rexp->type != r_concat))
  4258. {
  4259. rexp->params.pair.left = l;
  4260. rexp->params.pair.right = r;
  4261. return rexp;
  4262. }
  4263. else
  4264. {
  4265. rexp->params.pair.left = rexp->params.pair.right = 0;
  4266. rx_free_rexp (rx, rexp);
  4267. return l ? l : r;
  4268. }
  4269. case r_opt:
  4270. case r_star:
  4271. l = remove_unecessary_side_effects (rx, needed,
  4272. rexp->params.pair.left, params);
  4273. if (l)
  4274. {
  4275. rexp->params.pair.left = l;
  4276. return rexp;
  4277. }
  4278. else
  4279. {
  4280. rexp->params.pair.left = 0;
  4281. rx_free_rexp (rx, rexp);
  4282. return 0;
  4283. }
  4284. case r_side_effect:
  4285. {
  4286. int se = (long)rexp->params.side_effect;
  4287. if ( (se >= 0)
  4288. && ( ((enum re_side_effects)params[se].se == re_se_lparen)
  4289. || ((enum re_side_effects)params[se].se == re_se_rparen))
  4290. && (params [se].op1 > 0)
  4291. && (!needed [params [se].op1]))
  4292. {
  4293. rx_free_rexp (rx, rexp);
  4294. return 0;
  4295. }
  4296. else
  4297. return rexp;
  4298. }
  4299. }
  4300. /* this should never happen */
  4301. return 0;
  4302. }
  4303. #ifdef __STDC__
  4304. static int
  4305. pointless_if_repeated (struct rexp_node * node, struct re_se_params * params)
  4306. #else
  4307. static int
  4308. pointless_if_repeated (node, params)
  4309. struct rexp_node * node;
  4310. struct re_se_params * params;
  4311. #endif
  4312. {
  4313. if (!node)
  4314. return 1;
  4315. switch (node->type)
  4316. {
  4317. case r_cset:
  4318. return 0;
  4319. case r_alternate:
  4320. case r_concat:
  4321. case r_2phase_star:
  4322. return (pointless_if_repeated (node->params.pair.left, params)
  4323. && pointless_if_repeated (node->params.pair.right, params));
  4324. case r_opt:
  4325. case r_star:
  4326. return pointless_if_repeated (node->params.pair.left, params);
  4327. case r_side_effect:
  4328. switch (((long)node->params.side_effect < 0)
  4329. ? (enum re_side_effects)node->params.side_effect
  4330. : (enum re_side_effects)params[(long)node->params.side_effect].se)
  4331. {
  4332. case re_se_try:
  4333. case re_se_at_dot:
  4334. case re_se_begbuf:
  4335. case re_se_hat:
  4336. case re_se_wordbeg:
  4337. case re_se_wordbound:
  4338. case re_se_notwordbound:
  4339. case re_se_wordend:
  4340. case re_se_endbuf:
  4341. case re_se_dollar:
  4342. case re_se_fail:
  4343. case re_se_win:
  4344. return 1;
  4345. case re_se_lparen:
  4346. case re_se_rparen:
  4347. case re_se_iter:
  4348. case re_se_end_iter:
  4349. case re_se_syntax:
  4350. case re_se_not_syntax:
  4351. case re_se_backref:
  4352. return 0;
  4353. }
  4354. case r_data:
  4355. default:
  4356. return 0;
  4357. }
  4358. }
  4359. #ifdef __STDC__
  4360. static int
  4361. registers_on_stack (struct re_pattern_buffer * rxb,
  4362. struct rexp_node * rexp, int in_danger,
  4363. struct re_se_params * params)
  4364. #else
  4365. static int
  4366. registers_on_stack (rxb, rexp, in_danger, params)
  4367. struct re_pattern_buffer * rxb;
  4368. struct rexp_node * rexp;
  4369. int in_danger;
  4370. struct re_se_params * params;
  4371. #endif
  4372. {
  4373. if (!rexp)
  4374. return 0;
  4375. else
  4376. switch (rexp->type)
  4377. {
  4378. case r_cset:
  4379. case r_data:
  4380. return 0;
  4381. case r_alternate:
  4382. case r_concat:
  4383. return ( registers_on_stack (rxb, rexp->params.pair.left,
  4384. in_danger, params)
  4385. || (registers_on_stack
  4386. (rxb, rexp->params.pair.right,
  4387. in_danger, params)));
  4388. case r_opt:
  4389. return registers_on_stack (rxb, rexp->params.pair.left, 0, params);
  4390. case r_star:
  4391. return registers_on_stack (rxb, rexp->params.pair.left, 1, params);
  4392. case r_2phase_star:
  4393. return
  4394. ( registers_on_stack (rxb, rexp->params.pair.left, 1, params)
  4395. || registers_on_stack (rxb, rexp->params.pair.right, 1, params));
  4396. case r_side_effect:
  4397. {
  4398. int se = (long)rexp->params.side_effect;
  4399. if ( in_danger
  4400. && (se >= 0)
  4401. && (params [se].op1 > 0)
  4402. && ( ((enum re_side_effects)params[se].se == re_se_lparen)
  4403. || ((enum re_side_effects)params[se].se == re_se_rparen)))
  4404. return 1;
  4405. else
  4406. return 0;
  4407. }
  4408. }
  4409. /* this should never happen */
  4410. return 0;
  4411. }
  4412. static char idempotent_complex_se[] =
  4413. {
  4414. #define RX_WANT_SE_DEFS 1
  4415. #undef RX_DEF_SE
  4416. #undef RX_DEF_CPLX_SE
  4417. #define RX_DEF_SE(IDEM, NAME, VALUE)
  4418. #define RX_DEF_CPLX_SE(IDEM, NAME, VALUE) IDEM,
  4419. #include <regex.h>
  4420. #undef RX_DEF_SE
  4421. #undef RX_DEF_CPLX_SE
  4422. #undef RX_WANT_SE_DEFS
  4423. 23
  4424. };
  4425. static char idempotent_se[] =
  4426. {
  4427. 13,
  4428. #define RX_WANT_SE_DEFS 1
  4429. #undef RX_DEF_SE
  4430. #undef RX_DEF_CPLX_SE
  4431. #define RX_DEF_SE(IDEM, NAME, VALUE) IDEM,
  4432. #define RX_DEF_CPLX_SE(IDEM, NAME, VALUE)
  4433. #include <regex.h>
  4434. #undef RX_DEF_SE
  4435. #undef RX_DEF_CPLX_SE
  4436. #undef RX_WANT_SE_DEFS
  4437. 42
  4438. };
  4439. #ifdef __STDC__
  4440. static int
  4441. has_any_se (struct rx * rx,
  4442. struct rexp_node * rexp)
  4443. #else
  4444. static int
  4445. has_any_se (rx, rexp)
  4446. struct rx * rx;
  4447. struct rexp_node * rexp;
  4448. #endif
  4449. {
  4450. if (!rexp)
  4451. return 0;
  4452. switch (rexp->type)
  4453. {
  4454. case r_cset:
  4455. case r_data:
  4456. return 0;
  4457. case r_side_effect:
  4458. return 1;
  4459. case r_2phase_star:
  4460. case r_concat:
  4461. case r_alternate:
  4462. return
  4463. ( has_any_se (rx, rexp->params.pair.left)
  4464. || has_any_se (rx, rexp->params.pair.right));
  4465. case r_opt:
  4466. case r_star:
  4467. return has_any_se (rx, rexp->params.pair.left);
  4468. }
  4469. /* this should never happen */
  4470. return 0;
  4471. }
  4472. /* This must be called AFTER `convert_hard_loops' for a given REXP. */
  4473. #ifdef __STDC__
  4474. static int
  4475. has_non_idempotent_epsilon_path (struct rx * rx,
  4476. struct rexp_node * rexp,
  4477. struct re_se_params * params)
  4478. #else
  4479. static int
  4480. has_non_idempotent_epsilon_path (rx, rexp, params)
  4481. struct rx * rx;
  4482. struct rexp_node * rexp;
  4483. struct re_se_params * params;
  4484. #endif
  4485. {
  4486. if (!rexp)
  4487. return 0;
  4488. switch (rexp->type)
  4489. {
  4490. case r_cset:
  4491. case r_data:
  4492. case r_star:
  4493. return 0;
  4494. case r_side_effect:
  4495. return
  4496. !((long)rexp->params.side_effect > 0
  4497. ? idempotent_complex_se [ params [(long)rexp->params.side_effect].se ]
  4498. : idempotent_se [-(long)rexp->params.side_effect]);
  4499. case r_alternate:
  4500. return
  4501. ( has_non_idempotent_epsilon_path (rx,
  4502. rexp->params.pair.left, params)
  4503. || has_non_idempotent_epsilon_path (rx,
  4504. rexp->params.pair.right, params));
  4505. case r_2phase_star:
  4506. case r_concat:
  4507. return
  4508. ( has_non_idempotent_epsilon_path (rx,
  4509. rexp->params.pair.left, params)
  4510. && has_non_idempotent_epsilon_path (rx,
  4511. rexp->params.pair.right, params));
  4512. case r_opt:
  4513. return has_non_idempotent_epsilon_path (rx,
  4514. rexp->params.pair.left, params);
  4515. }
  4516. /* this should never happen */
  4517. return 0;
  4518. }
  4519. /* This computes rougly what it's name suggests. It can (and does) go wrong
  4520. * in the direction of returning spurious 0 without causing disasters.
  4521. */
  4522. #ifdef __STDC__
  4523. static int
  4524. begins_with_complex_se (struct rx * rx, struct rexp_node * rexp)
  4525. #else
  4526. static int
  4527. begins_with_complex_se (rx, rexp)
  4528. struct rx * rx;
  4529. struct rexp_node * rexp;
  4530. #endif
  4531. {
  4532. if (!rexp)
  4533. return 0;
  4534. switch (rexp->type)
  4535. {
  4536. case r_cset:
  4537. case r_data:
  4538. return 0;
  4539. case r_side_effect:
  4540. return ((long)rexp->params.side_effect >= 0);
  4541. case r_alternate:
  4542. return
  4543. ( begins_with_complex_se (rx, rexp->params.pair.left)
  4544. && begins_with_complex_se (rx, rexp->params.pair.right));
  4545. case r_concat:
  4546. return has_any_se (rx, rexp->params.pair.left);
  4547. case r_opt:
  4548. case r_star:
  4549. case r_2phase_star:
  4550. return 0;
  4551. }
  4552. /* this should never happen */
  4553. return 0;
  4554. }
  4555. /* This destructively removes some of the re_se_tv side effects from
  4556. * a rexp tree. In particular, during parsing re_se_tv was inserted on the
  4557. * right half of every | to guarantee that posix path preference could be
  4558. * honored. This function removes some which it can be determined aren't
  4559. * needed.
  4560. */
  4561. #ifdef __STDC__
  4562. static void
  4563. speed_up_alt (struct rx * rx,
  4564. struct rexp_node * rexp,
  4565. int unposix)
  4566. #else
  4567. static void
  4568. speed_up_alt (rx, rexp, unposix)
  4569. struct rx * rx;
  4570. struct rexp_node * rexp;
  4571. int unposix;
  4572. #endif
  4573. {
  4574. if (!rexp)
  4575. return;
  4576. switch (rexp->type)
  4577. {
  4578. case r_cset:
  4579. case r_data:
  4580. case r_side_effect:
  4581. return;
  4582. case r_opt:
  4583. case r_star:
  4584. speed_up_alt (rx, rexp->params.pair.left, unposix);
  4585. return;
  4586. case r_2phase_star:
  4587. case r_concat:
  4588. speed_up_alt (rx, rexp->params.pair.left, unposix);
  4589. speed_up_alt (rx, rexp->params.pair.right, unposix);
  4590. return;
  4591. case r_alternate:
  4592. /* the right child is guaranteed to be (concat re_se_tv <subexp>) */
  4593. speed_up_alt (rx, rexp->params.pair.left, unposix);
  4594. speed_up_alt (rx, rexp->params.pair.right->params.pair.right, unposix);
  4595. if ( unposix
  4596. || (begins_with_complex_se
  4597. (rx, rexp->params.pair.right->params.pair.right))
  4598. || !( has_any_se (rx, rexp->params.pair.right->params.pair.right)
  4599. || has_any_se (rx, rexp->params.pair.left)))
  4600. {
  4601. struct rexp_node * conc = rexp->params.pair.right;
  4602. rexp->params.pair.right = conc->params.pair.right;
  4603. conc->params.pair.right = 0;
  4604. rx_free_rexp (rx, conc);
  4605. }
  4606. }
  4607. }
  4608. /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
  4609. Returns one of error codes defined in `regex.h', or zero for success.
  4610. Assumes the `allocated' (and perhaps `buffer') and `translate'
  4611. fields are set in BUFP on entry.
  4612. If it succeeds, results are put in BUFP (if it returns an error, the
  4613. contents of BUFP are undefined):
  4614. `buffer' is the compiled pattern;
  4615. `syntax' is set to SYNTAX;
  4616. `used' is set to the length of the compiled pattern;
  4617. `fastmap_accurate' is set to zero;
  4618. `re_nsub' is set to the number of groups in PATTERN;
  4619. `not_bol' and `not_eol' are set to zero.
  4620. The `fastmap' and `newline_anchor' fields are neither
  4621. examined nor set. */
  4622. #ifdef __STDC__
  4623. RX_DECL reg_errcode_t
  4624. rx_compile (__const__ char *pattern, int size,
  4625. reg_syntax_t syntax,
  4626. struct re_pattern_buffer * rxb)
  4627. #else
  4628. RX_DECL reg_errcode_t
  4629. rx_compile (pattern, size, syntax, rxb)
  4630. __const__ char *pattern;
  4631. int size;
  4632. reg_syntax_t syntax;
  4633. struct re_pattern_buffer * rxb;
  4634. #endif
  4635. {
  4636. RX_subset
  4637. inverse_translate [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
  4638. char
  4639. validate_inv_tr [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
  4640. /* We fetch characters from PATTERN here. Even though PATTERN is
  4641. `char *' (i.e., signed), we declare these variables as unsigned, so
  4642. they can be reliably used as array indices. */
  4643. register unsigned char c, c1;
  4644. /* A random tempory spot in PATTERN. */
  4645. __const__ char *p1;
  4646. /* Keeps track of unclosed groups. */
  4647. compile_stack_type compile_stack;
  4648. /* Points to the current (ending) position in the pattern. */
  4649. __const__ char *p = pattern;
  4650. __const__ char *pend = pattern + size;
  4651. /* How to translate the characters in the pattern. */
  4652. unsigned char *translate = (rxb->translate
  4653. ? rxb->translate
  4654. : rx_id_translation);
  4655. /* When parsing is done, this will hold the expression tree. */
  4656. struct rexp_node * rexp = 0;
  4657. /* In the midst of compilation, this holds onto the regexp
  4658. * first parst while rexp goes on to aquire additional constructs.
  4659. */
  4660. struct rexp_node * orig_rexp = 0;
  4661. struct rexp_node * fewer_side_effects = 0;
  4662. /* This and top_expression are saved on the compile stack. */
  4663. struct rexp_node ** top_expression = &rexp;
  4664. struct rexp_node ** last_expression = top_expression;
  4665. /* Parameter to `goto append_node' */
  4666. struct rexp_node * append;
  4667. /* Counts open-groups as they are encountered. This is the index of the
  4668. * innermost group being compiled.
  4669. */
  4670. regnum_t regnum = 0;
  4671. /* Place in the uncompiled pattern (i.e., the {) to
  4672. * which to go back if the interval is invalid.
  4673. */
  4674. __const__ char *beg_interval;
  4675. struct re_se_params * params = 0;
  4676. int paramc = 0; /* How many complex side effects so far? */
  4677. rx_side_effect side; /* param to `goto add_side_effect' */
  4678. bzero (validate_inv_tr, sizeof (validate_inv_tr));
  4679. rxb->rx.instruction_table = rx_id_instruction_table;
  4680. /* Initialize the compile stack. */
  4681. compile_stack.stack = (( compile_stack_elt_t *) malloc ((INIT_COMPILE_STACK_SIZE) * sizeof ( compile_stack_elt_t)));
  4682. if (compile_stack.stack == 0)
  4683. return REG_ESPACE;
  4684. compile_stack.size = INIT_COMPILE_STACK_SIZE;
  4685. compile_stack.avail = 0;
  4686. /* Initialize the pattern buffer. */
  4687. rxb->rx.cache = &default_cache;
  4688. rxb->syntax = syntax;
  4689. rxb->fastmap_accurate = 0;
  4690. rxb->not_bol = rxb->not_eol = 0;
  4691. rxb->least_subs = 0;
  4692. /* Always count groups, whether or not rxb->no_sub is set.
  4693. * The whole pattern is implicitly group 0, so counting begins
  4694. * with 1.
  4695. */
  4696. rxb->re_nsub = 0;
  4697. #if !defined (emacs) && !defined (SYNTAX_TABLE)
  4698. /* Initialize the syntax table. */
  4699. init_syntax_once ();
  4700. #endif
  4701. /* Loop through the uncompiled pattern until we're at the end. */
  4702. while (p != pend)
  4703. {
  4704. PATFETCH (c);
  4705. switch (c)
  4706. {
  4707. case '^':
  4708. {
  4709. if ( /* If at start of pattern, it's an operator. */
  4710. p == pattern + 1
  4711. /* If context independent, it's an operator. */
  4712. || syntax & RE_CONTEXT_INDEP_ANCHORS
  4713. /* Otherwise, depends on what's come before. */
  4714. || at_begline_loc_p (pattern, p, syntax))
  4715. {
  4716. struct rexp_node * n
  4717. = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)re_se_hat);
  4718. if (!n)
  4719. return REG_ESPACE;
  4720. append = n;
  4721. goto append_node;
  4722. }
  4723. else
  4724. goto normal_char;
  4725. }
  4726. break;
  4727. case '$':
  4728. {
  4729. if ( /* If at end of pattern, it's an operator. */
  4730. p == pend
  4731. /* If context independent, it's an operator. */
  4732. || syntax & RE_CONTEXT_INDEP_ANCHORS
  4733. /* Otherwise, depends on what's next. */
  4734. || at_endline_loc_p (p, pend, syntax))
  4735. {
  4736. struct rexp_node * n
  4737. = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)re_se_dollar);
  4738. if (!n)
  4739. return REG_ESPACE;
  4740. append = n;
  4741. goto append_node;
  4742. }
  4743. else
  4744. goto normal_char;
  4745. }
  4746. break;
  4747. case '+':
  4748. case '?':
  4749. if ((syntax & RE_BK_PLUS_QM)
  4750. || (syntax & RE_LIMITED_OPS))
  4751. goto normal_char;
  4752. handle_plus:
  4753. case '*':
  4754. /* If there is no previous pattern... */
  4755. if (pointless_if_repeated (*last_expression, params))
  4756. {
  4757. if (syntax & RE_CONTEXT_INVALID_OPS)
  4758. return REG_BADRPT;
  4759. else if (!(syntax & RE_CONTEXT_INDEP_OPS))
  4760. goto normal_char;
  4761. }
  4762. {
  4763. /* 1 means zero (many) matches is allowed. */
  4764. char zero_times_ok = 0, many_times_ok = 0;
  4765. /* If there is a sequence of repetition chars, collapse it
  4766. down to just one (the right one). We can't combine
  4767. interval operators with these because of, e.g., `a{2}*',
  4768. which should only match an even number of `a's. */
  4769. for (;;)
  4770. {
  4771. zero_times_ok |= c != '+';
  4772. many_times_ok |= c != '?';
  4773. if (p == pend)
  4774. break;
  4775. PATFETCH (c);
  4776. if (c == '*'
  4777. || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
  4778. ;
  4779. else if (syntax & RE_BK_PLUS_QM && c == '\\')
  4780. {
  4781. if (p == pend) return REG_EESCAPE;
  4782. PATFETCH (c1);
  4783. if (!(c1 == '+' || c1 == '?'))
  4784. {
  4785. PATUNFETCH;
  4786. PATUNFETCH;
  4787. break;
  4788. }
  4789. c = c1;
  4790. }
  4791. else
  4792. {
  4793. PATUNFETCH;
  4794. break;
  4795. }
  4796. /* If we get here, we found another repeat character. */
  4797. }
  4798. /* Star, etc. applied to an empty pattern is equivalent
  4799. to an empty pattern. */
  4800. if (!last_expression)
  4801. break;
  4802. /* Now we know whether or not zero matches is allowed
  4803. * and also whether or not two or more matches is allowed.
  4804. */
  4805. {
  4806. struct rexp_node * inner_exp = *last_expression;
  4807. int need_sync = 0;
  4808. if (many_times_ok
  4809. && has_non_idempotent_epsilon_path (&rxb->rx,
  4810. inner_exp, params))
  4811. {
  4812. struct rexp_node * pusher
  4813. = rx_mk_r_side_effect (&rxb->rx,
  4814. (rx_side_effect)re_se_pushpos);
  4815. struct rexp_node * checker
  4816. = rx_mk_r_side_effect (&rxb->rx,
  4817. (rx_side_effect)re_se_chkpos);
  4818. struct rexp_node * pushback
  4819. = rx_mk_r_side_effect (&rxb->rx,
  4820. (rx_side_effect)re_se_pushback);
  4821. rx_Bitset cs = rx_cset (&rxb->rx);
  4822. struct rexp_node * lit_t;
  4823. struct rexp_node * fake_state;
  4824. struct rexp_node * phase2;
  4825. struct rexp_node * popper;
  4826. struct rexp_node * star;
  4827. struct rexp_node * a;
  4828. struct rexp_node * whole_thing;
  4829. if (! cs)
  4830. return REG_ESPACE;
  4831. lit_t = rx_mk_r_cset (&rxb->rx, cs);
  4832. fake_state = rx_mk_r_concat (&rxb->rx, pushback, lit_t);
  4833. phase2 = rx_mk_r_concat (&rxb->rx, checker, fake_state);
  4834. popper = rx_mk_r_side_effect (&rxb->rx,
  4835. (rx_side_effect)re_se_poppos);
  4836. star = rx_mk_r_2phase_star (&rxb->rx, inner_exp, phase2);
  4837. a = rx_mk_r_concat (&rxb->rx, pusher, star);
  4838. whole_thing = rx_mk_r_concat (&rxb->rx, a, popper);
  4839. if (!(pusher && star && pushback && lit_t && fake_state
  4840. && lit_t && phase2 && checker && popper
  4841. && a && whole_thing))
  4842. return REG_ESPACE;
  4843. RX_bitset_enjoin (cs, 't');
  4844. *last_expression = whole_thing;
  4845. }
  4846. else
  4847. {
  4848. struct rexp_node * star =
  4849. (many_times_ok ? rx_mk_r_star : rx_mk_r_opt)
  4850. (&rxb->rx, *last_expression);
  4851. if (!star)
  4852. return REG_ESPACE;
  4853. *last_expression = star;
  4854. need_sync = has_any_se (&rxb->rx, *last_expression);
  4855. }
  4856. if (!zero_times_ok)
  4857. {
  4858. struct rexp_node * concat
  4859. = rx_mk_r_concat (&rxb->rx, inner_exp,
  4860. rx_copy_rexp (&rxb->rx,
  4861. *last_expression));
  4862. if (!concat)
  4863. return REG_ESPACE;
  4864. *last_expression = concat;
  4865. }
  4866. if (need_sync)
  4867. {
  4868. int sync_se = paramc;
  4869. params = (params
  4870. ? ((struct re_se_params *)
  4871. realloc (params,
  4872. sizeof (*params) * (1 + paramc)))
  4873. : ((struct re_se_params *)
  4874. malloc (sizeof (*params))));
  4875. if (!params)
  4876. return REG_ESPACE;
  4877. ++paramc;
  4878. params [sync_se].se = re_se_tv;
  4879. side = (rx_side_effect)sync_se;
  4880. goto add_side_effect;
  4881. }
  4882. }
  4883. /* The old regex.c used to optimize `.*\n'.
  4884. * Maybe rx should too?
  4885. */
  4886. }
  4887. break;
  4888. case '.':
  4889. {
  4890. rx_Bitset cs = rx_cset (&rxb->rx);
  4891. struct rexp_node * n = rx_mk_r_cset (&rxb->rx, cs);
  4892. if (!(cs && n))
  4893. return REG_ESPACE;
  4894. rx_bitset_universe (rxb->rx.local_cset_size, cs);
  4895. if (!(rxb->syntax & RE_DOT_NEWLINE))
  4896. RX_bitset_remove (cs, '\n');
  4897. if (!(rxb->syntax & RE_DOT_NOT_NULL))
  4898. RX_bitset_remove (cs, 0);
  4899. append = n;
  4900. goto append_node;
  4901. break;
  4902. }
  4903. case '[':
  4904. if (p == pend) return REG_EBRACK;
  4905. {
  4906. boolean had_char_class = false;
  4907. rx_Bitset cs = rx_cset (&rxb->rx);
  4908. struct rexp_node * node = rx_mk_r_cset (&rxb->rx, cs);
  4909. int is_inverted = *p == '^';
  4910. if (!(node && cs))
  4911. return REG_ESPACE;
  4912. /* This branch of the switch is normally exited with
  4913. *`goto append_node'
  4914. */
  4915. append = node;
  4916. if (is_inverted)
  4917. p++;
  4918. /* Remember the first position in the bracket expression. */
  4919. p1 = p;
  4920. /* Read in characters and ranges, setting map bits. */
  4921. for (;;)
  4922. {
  4923. if (p == pend) return REG_EBRACK;
  4924. PATFETCH (c);
  4925. /* \ might escape characters inside [...] and [^...]. */
  4926. if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
  4927. {
  4928. if (p == pend) return REG_EESCAPE;
  4929. PATFETCH (c1);
  4930. {
  4931. rx_Bitset it = inverse_translation (rxb,
  4932. validate_inv_tr,
  4933. inverse_translate,
  4934. translate,
  4935. c1);
  4936. rx_bitset_union (rxb->rx.local_cset_size, cs, it);
  4937. }
  4938. continue;
  4939. }
  4940. /* Could be the end of the bracket expression. If it's
  4941. not (i.e., when the bracket expression is `[]' so
  4942. far), the ']' character bit gets set way below. */
  4943. if (c == ']' && p != p1 + 1)
  4944. goto finalize_class_and_append;
  4945. /* Look ahead to see if it's a range when the last thing
  4946. was a character class. */
  4947. if (had_char_class && c == '-' && *p != ']')
  4948. return REG_ERANGE;
  4949. /* Look ahead to see if it's a range when the last thing
  4950. was a character: if this is a hyphen not at the
  4951. beginning or the end of a list, then it's the range
  4952. operator. */
  4953. if (c == '-'
  4954. && !(p - 2 >= pattern && p[-2] == '[')
  4955. && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
  4956. && *p != ']')
  4957. {
  4958. reg_errcode_t ret
  4959. = compile_range (rxb, cs, &p, pend, translate, syntax,
  4960. inverse_translate, validate_inv_tr);
  4961. if (ret != REG_NOERROR) return ret;
  4962. }
  4963. else if (p[0] == '-' && p[1] != ']')
  4964. { /* This handles ranges made up of characters only. */
  4965. reg_errcode_t ret;
  4966. /* Move past the `-'. */
  4967. PATFETCH (c1);
  4968. ret = compile_range (rxb, cs, &p, pend, translate, syntax,
  4969. inverse_translate, validate_inv_tr);
  4970. if (ret != REG_NOERROR) return ret;
  4971. }
  4972. /* See if we're at the beginning of a possible character
  4973. class. */
  4974. else if ((syntax & RE_CHAR_CLASSES)
  4975. && (c == '[') && (*p == ':'))
  4976. {
  4977. char str[CHAR_CLASS_MAX_LENGTH + 1];
  4978. PATFETCH (c);
  4979. c1 = 0;
  4980. /* If pattern is `[[:'. */
  4981. if (p == pend) return REG_EBRACK;
  4982. for (;;)
  4983. {
  4984. PATFETCH (c);
  4985. if (c == ':' || c == ']' || p == pend
  4986. || c1 == CHAR_CLASS_MAX_LENGTH)
  4987. break;
  4988. str[c1++] = c;
  4989. }
  4990. str[c1] = '\0';
  4991. /* If isn't a word bracketed by `[:' and:`]':
  4992. undo the ending character, the letters, and leave
  4993. the leading `:' and `[' (but set bits for them). */
  4994. if (c == ':' && *p == ']')
  4995. {
  4996. int ch;
  4997. boolean is_alnum = !strcmp (str, "alnum");
  4998. boolean is_alpha = !strcmp (str, "alpha");
  4999. boolean is_blank = !strcmp (str, "blank");
  5000. boolean is_cntrl = !strcmp (str, "cntrl");
  5001. boolean is_digit = !strcmp (str, "digit");
  5002. boolean is_graph = !strcmp (str, "graph");
  5003. boolean is_lower = !strcmp (str, "lower");
  5004. boolean is_print = !strcmp (str, "print");
  5005. boolean is_punct = !strcmp (str, "punct");
  5006. boolean is_space = !strcmp (str, "space");
  5007. boolean is_upper = !strcmp (str, "upper");
  5008. boolean is_xdigit = !strcmp (str, "xdigit");
  5009. if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
  5010. /* Throw away the ] at the end of the character
  5011. class. */
  5012. PATFETCH (c);
  5013. if (p == pend) return REG_EBRACK;
  5014. for (ch = 0; ch < 1 << CHARBITS; ch++)
  5015. {
  5016. if ( (is_alnum && isalnum (ch))
  5017. || (is_alpha && isalpha (ch))
  5018. || (is_blank && isblank (ch))
  5019. || (is_cntrl && iscntrl (ch))
  5020. || (is_digit && isdigit (ch))
  5021. || (is_graph && isgraph (ch))
  5022. || (is_lower && islower (ch))
  5023. || (is_print && isprint (ch))
  5024. || (is_punct && ispunct (ch))
  5025. || (is_space && isspace (ch))
  5026. || (is_upper && isupper (ch))
  5027. || (is_xdigit && isxdigit (ch)))
  5028. {
  5029. rx_Bitset it =
  5030. inverse_translation (rxb,
  5031. validate_inv_tr,
  5032. inverse_translate,
  5033. translate,
  5034. ch);
  5035. rx_bitset_union (rxb->rx.local_cset_size,
  5036. cs, it);
  5037. }
  5038. }
  5039. had_char_class = true;
  5040. }
  5041. else
  5042. {
  5043. c1++;
  5044. while (c1--)
  5045. PATUNFETCH;
  5046. {
  5047. rx_Bitset it =
  5048. inverse_translation (rxb,
  5049. validate_inv_tr,
  5050. inverse_translate,
  5051. translate,
  5052. '[');
  5053. rx_bitset_union (rxb->rx.local_cset_size,
  5054. cs, it);
  5055. }
  5056. {
  5057. rx_Bitset it =
  5058. inverse_translation (rxb,
  5059. validate_inv_tr,
  5060. inverse_translate,
  5061. translate,
  5062. ':');
  5063. rx_bitset_union (rxb->rx.local_cset_size,
  5064. cs, it);
  5065. }
  5066. had_char_class = false;
  5067. }
  5068. }
  5069. else
  5070. {
  5071. had_char_class = false;
  5072. {
  5073. rx_Bitset it = inverse_translation (rxb,
  5074. validate_inv_tr,
  5075. inverse_translate,
  5076. translate,
  5077. c);
  5078. rx_bitset_union (rxb->rx.local_cset_size, cs, it);
  5079. }
  5080. }
  5081. }
  5082. finalize_class_and_append:
  5083. if (is_inverted)
  5084. {
  5085. rx_bitset_complement (rxb->rx.local_cset_size, cs);
  5086. if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
  5087. RX_bitset_remove (cs, '\n');
  5088. }
  5089. goto append_node;
  5090. }
  5091. break;
  5092. case '(':
  5093. if (syntax & RE_NO_BK_PARENS)
  5094. goto handle_open;
  5095. else
  5096. goto normal_char;
  5097. case ')':
  5098. if (syntax & RE_NO_BK_PARENS)
  5099. goto handle_close;
  5100. else
  5101. goto normal_char;
  5102. case '\n':
  5103. if (syntax & RE_NEWLINE_ALT)
  5104. goto handle_alt;
  5105. else
  5106. goto normal_char;
  5107. case '|':
  5108. if (syntax & RE_NO_BK_VBAR)
  5109. goto handle_alt;
  5110. else
  5111. goto normal_char;
  5112. case '{':
  5113. if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
  5114. goto handle_interval;
  5115. else
  5116. goto normal_char;
  5117. case '\\':
  5118. if (p == pend) return REG_EESCAPE;
  5119. /* Do not translate the character after the \, so that we can
  5120. distinguish, e.g., \B from \b, even if we normally would
  5121. translate, e.g., B to b. */
  5122. PATFETCH_RAW (c);
  5123. switch (c)
  5124. {
  5125. case '(':
  5126. if (syntax & RE_NO_BK_PARENS)
  5127. goto normal_backslash;
  5128. handle_open:
  5129. rxb->re_nsub++;
  5130. regnum++;
  5131. if (COMPILE_STACK_FULL)
  5132. {
  5133. ((compile_stack.stack) =
  5134. (compile_stack_elt_t *) realloc (compile_stack.stack, ( compile_stack.size << 1) * sizeof (
  5135. compile_stack_elt_t)));
  5136. if (compile_stack.stack == 0) return REG_ESPACE;
  5137. compile_stack.size <<= 1;
  5138. }
  5139. if (*last_expression)
  5140. {
  5141. struct rexp_node * concat
  5142. = rx_mk_r_concat (&rxb->rx, *last_expression, 0);
  5143. if (!concat)
  5144. return REG_ESPACE;
  5145. *last_expression = concat;
  5146. last_expression = &concat->params.pair.right;
  5147. }
  5148. /*
  5149. * These are the values to restore when we hit end of this
  5150. * group.
  5151. */
  5152. COMPILE_STACK_TOP.top_expression = top_expression;
  5153. COMPILE_STACK_TOP.last_expression = last_expression;
  5154. COMPILE_STACK_TOP.regnum = regnum;
  5155. compile_stack.avail++;
  5156. top_expression = last_expression;
  5157. break;
  5158. case ')':
  5159. if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
  5160. handle_close:
  5161. /* See similar code for backslashed left paren above. */
  5162. if (COMPILE_STACK_EMPTY) {
  5163. if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) {
  5164. goto normal_char;
  5165. } else {
  5166. return REG_ERPAREN;
  5167. }
  5168. }
  5169. /* Since we just checked for an empty stack above, this
  5170. ``can't happen''. */
  5171. {
  5172. /* We don't just want to restore into `regnum', because
  5173. later groups should continue to be numbered higher,
  5174. as in `(ab)c(de)' -- the second group is #2. */
  5175. regnum_t this_group_regnum;
  5176. struct rexp_node ** inner = top_expression;
  5177. compile_stack.avail--;
  5178. top_expression = COMPILE_STACK_TOP.top_expression;
  5179. last_expression = COMPILE_STACK_TOP.last_expression;
  5180. this_group_regnum = COMPILE_STACK_TOP.regnum;
  5181. {
  5182. int left_se = paramc;
  5183. int right_se = paramc + 1;
  5184. params = (params
  5185. ? ((struct re_se_params *)
  5186. realloc (params,
  5187. (paramc + 2) * sizeof (params[0])))
  5188. : ((struct re_se_params *)
  5189. malloc (2 * sizeof (params[0]))));
  5190. if (!params)
  5191. return REG_ESPACE;
  5192. paramc += 2;
  5193. params[left_se].se = re_se_lparen;
  5194. params[left_se].op1 = this_group_regnum;
  5195. params[right_se].se = re_se_rparen;
  5196. params[right_se].op1 = this_group_regnum;
  5197. {
  5198. struct rexp_node * left
  5199. = rx_mk_r_side_effect (&rxb->rx,
  5200. (rx_side_effect)left_se);
  5201. struct rexp_node * right
  5202. = rx_mk_r_side_effect (&rxb->rx,
  5203. (rx_side_effect)right_se);
  5204. struct rexp_node * c1
  5205. = (*inner
  5206. ? rx_mk_r_concat (&rxb->rx, left, *inner) : left);
  5207. struct rexp_node * c2
  5208. = rx_mk_r_concat (&rxb->rx, c1, right);
  5209. if (!(left && right && c1 && c2))
  5210. return REG_ESPACE;
  5211. *inner = c2;
  5212. }
  5213. }
  5214. break;
  5215. }
  5216. case '|': /* `\|'. */
  5217. if ((syntax & RE_LIMITED_OPS) || (syntax & RE_NO_BK_VBAR))
  5218. goto normal_backslash;
  5219. handle_alt:
  5220. if (syntax & RE_LIMITED_OPS)
  5221. goto normal_char;
  5222. {
  5223. struct rexp_node * alt
  5224. = rx_mk_r_alternate (&rxb->rx, *top_expression, 0);
  5225. if (!alt)
  5226. return REG_ESPACE;
  5227. *top_expression = alt;
  5228. last_expression = &alt->params.pair.right;
  5229. {
  5230. int sync_se = paramc;
  5231. params = (params
  5232. ? ((struct re_se_params *)
  5233. realloc (params,
  5234. (paramc + 1) * sizeof (params[0])))
  5235. : ((struct re_se_params *)
  5236. malloc (sizeof (params[0]))));
  5237. if (!params)
  5238. return REG_ESPACE;
  5239. ++paramc;
  5240. params[sync_se].se = re_se_tv;
  5241. {
  5242. struct rexp_node * sync
  5243. = rx_mk_r_side_effect (&rxb->rx,
  5244. (rx_side_effect)sync_se);
  5245. struct rexp_node * conc
  5246. = rx_mk_r_concat (&rxb->rx, sync, 0);
  5247. if (!sync || !conc)
  5248. return REG_ESPACE;
  5249. *last_expression = conc;
  5250. last_expression = &conc->params.pair.right;
  5251. }
  5252. }
  5253. }
  5254. break;
  5255. case '{':
  5256. /* If \{ is a literal. */
  5257. if (!(syntax & RE_INTERVALS)
  5258. /* If we're at `\{' and it's not the open-interval
  5259. operator. */
  5260. || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
  5261. || (p - 2 == pattern && p == pend))
  5262. goto normal_backslash;
  5263. handle_interval:
  5264. {
  5265. /* If got here, then the syntax allows intervals. */
  5266. /* At least (most) this many matches must be made. */
  5267. int lower_bound = -1, upper_bound = -1;
  5268. beg_interval = p - 1;
  5269. if (p == pend)
  5270. {
  5271. if (syntax & RE_NO_BK_BRACES)
  5272. goto unfetch_interval;
  5273. else
  5274. return REG_EBRACE;
  5275. }
  5276. GET_UNSIGNED_NUMBER (lower_bound);
  5277. if (c == ',')
  5278. {
  5279. GET_UNSIGNED_NUMBER (upper_bound);
  5280. if (upper_bound < 0) upper_bound = RE_DUP_MAX;
  5281. }
  5282. else
  5283. /* Interval such as `{1}' => match exactly once. */
  5284. upper_bound = lower_bound;
  5285. if (lower_bound < 0 || upper_bound > RE_DUP_MAX
  5286. || lower_bound > upper_bound)
  5287. {
  5288. if (syntax & RE_NO_BK_BRACES)
  5289. goto unfetch_interval;
  5290. else
  5291. return REG_BADBR;
  5292. }
  5293. if (!(syntax & RE_NO_BK_BRACES))
  5294. {
  5295. if (c != '\\') return REG_EBRACE;
  5296. PATFETCH (c);
  5297. }
  5298. if (c != '}')
  5299. {
  5300. if (syntax & RE_NO_BK_BRACES)
  5301. goto unfetch_interval;
  5302. else
  5303. return REG_BADBR;
  5304. }
  5305. /* We just parsed a valid interval. */
  5306. /* If it's invalid to have no preceding re. */
  5307. if (pointless_if_repeated (*last_expression, params))
  5308. {
  5309. if (syntax & RE_CONTEXT_INVALID_OPS)
  5310. return REG_BADRPT;
  5311. else if (!(syntax & RE_CONTEXT_INDEP_OPS))
  5312. goto unfetch_interval;
  5313. /* was: else laststart = b; */
  5314. }
  5315. /* If the upper bound is zero, don't want to iterate
  5316. * at all.
  5317. */
  5318. if (upper_bound == 0)
  5319. {
  5320. if (*last_expression)
  5321. {
  5322. rx_free_rexp (&rxb->rx, *last_expression);
  5323. *last_expression = 0;
  5324. }
  5325. }
  5326. else
  5327. /* Otherwise, we have a nontrivial interval. */
  5328. {
  5329. int iter_se = paramc;
  5330. int end_se = paramc + 1;
  5331. params = (params
  5332. ? ((struct re_se_params *)
  5333. realloc (params,
  5334. sizeof (*params) * (2 + paramc)))
  5335. : ((struct re_se_params *)
  5336. malloc (2 * sizeof (*params))));
  5337. if (!params)
  5338. return REG_ESPACE;
  5339. paramc += 2;
  5340. params [iter_se].se = re_se_iter;
  5341. params [iter_se].op1 = lower_bound;
  5342. params[iter_se].op2 = upper_bound;
  5343. params[end_se].se = re_se_end_iter;
  5344. params[end_se].op1 = lower_bound;
  5345. params[end_se].op2 = upper_bound;
  5346. {
  5347. struct rexp_node * push0
  5348. = rx_mk_r_side_effect (&rxb->rx,
  5349. (rx_side_effect)re_se_push0);
  5350. struct rexp_node * start_one_iter
  5351. = rx_mk_r_side_effect (&rxb->rx,
  5352. (rx_side_effect)iter_se);
  5353. struct rexp_node * phase1
  5354. = rx_mk_r_concat (&rxb->rx, start_one_iter,
  5355. *last_expression);
  5356. struct rexp_node * pushback
  5357. = rx_mk_r_side_effect (&rxb->rx,
  5358. (rx_side_effect)re_se_pushback);
  5359. rx_Bitset cs = rx_cset (&rxb->rx);
  5360. struct rexp_node * lit_t;
  5361. struct rexp_node * phase2;
  5362. struct rexp_node * loop;
  5363. struct rexp_node * push_n_loop;
  5364. struct rexp_node * final_test;
  5365. struct rexp_node * full_exp;
  5366. if (! cs)
  5367. return REG_ESPACE;
  5368. lit_t = rx_mk_r_cset (&rxb->rx, cs);
  5369. phase2 = rx_mk_r_concat (&rxb->rx, pushback, lit_t);
  5370. loop = rx_mk_r_2phase_star (&rxb->rx, phase1, phase2);
  5371. push_n_loop = rx_mk_r_concat (&rxb->rx, push0, loop);
  5372. final_test = rx_mk_r_side_effect (&rxb->rx,
  5373. (rx_side_effect)end_se);
  5374. full_exp = rx_mk_r_concat (&rxb->rx, push_n_loop, final_test);
  5375. if (!(push0 && start_one_iter && phase1
  5376. && pushback && lit_t && phase2
  5377. && loop && push_n_loop && final_test && full_exp))
  5378. return REG_ESPACE;
  5379. RX_bitset_enjoin(cs, 't');
  5380. *last_expression = full_exp;
  5381. }
  5382. }
  5383. beg_interval = 0;
  5384. }
  5385. break;
  5386. unfetch_interval:
  5387. /* If an invalid interval, match the characters as literals. */
  5388. p = beg_interval;
  5389. beg_interval = 0;
  5390. /* normal_char and normal_backslash need `c'. */
  5391. PATFETCH (c);
  5392. if (!(syntax & RE_NO_BK_BRACES))
  5393. {
  5394. if (p > pattern && p[-1] == '\\')
  5395. goto normal_backslash;
  5396. }
  5397. goto normal_char;
  5398. #ifdef emacs
  5399. /* There is no way to specify the before_dot and after_dot
  5400. operators. rms says this is ok. --karl */
  5401. case '=':
  5402. side = (rx_side_effect)rx_se_at_dot;
  5403. goto add_side_effect;
  5404. break;
  5405. case 's':
  5406. case 'S':
  5407. {
  5408. rx_Bitset cs = rx_cset (&rxb->rx);
  5409. struct rexp_node * set = rx_mk_r_cset (&rxb->rx, cs);
  5410. if (!(cs && set))
  5411. return REG_ESPACE;
  5412. if (c == 'S')
  5413. rx_bitset_universe (rxb->rx.local_cset_size, cs);
  5414. PATFETCH (c);
  5415. {
  5416. int x;
  5417. enum syntaxcode code = syntax_spec_code [c];
  5418. for (x = 0; x < 256; ++x)
  5419. {
  5420. if (SYNTAX (x) == code)
  5421. {
  5422. rx_Bitset it =
  5423. inverse_translation (rxb, validate_inv_tr,
  5424. inverse_translate,
  5425. translate, x);
  5426. rx_bitset_xor (rxb->rx.local_cset_size, cs, it);
  5427. }
  5428. }
  5429. }
  5430. append = set;
  5431. goto append_node;
  5432. }
  5433. break;
  5434. #endif /* emacs */
  5435. case 'w':
  5436. case 'W':
  5437. if (syntax & RE_NO_GNU_OPS)
  5438. goto normal_char;
  5439. {
  5440. rx_Bitset cs = rx_cset (&rxb->rx);
  5441. struct rexp_node * n = (cs ? rx_mk_r_cset (&rxb->rx, cs) : 0);
  5442. if (!(cs && n))
  5443. return REG_ESPACE;
  5444. if (c == 'W')
  5445. rx_bitset_universe (rxb->rx.local_cset_size ,cs);
  5446. {
  5447. int x;
  5448. for (x = rxb->rx.local_cset_size - 1; x > 0; --x)
  5449. if (SYNTAX(x) & Sword)
  5450. RX_bitset_toggle (cs, x);
  5451. }
  5452. append = n;
  5453. goto append_node;
  5454. }
  5455. break;
  5456. /* With a little extra work, some of these side effects could be optimized
  5457. * away (basicly by looking at what we already know about the surrounding
  5458. * chars).
  5459. */
  5460. case '<':
  5461. if (syntax & RE_NO_GNU_OPS)
  5462. goto normal_char;
  5463. side = (rx_side_effect)re_se_wordbeg;
  5464. goto add_side_effect;
  5465. break;
  5466. case '>':
  5467. if (syntax & RE_NO_GNU_OPS)
  5468. goto normal_char;
  5469. side = (rx_side_effect)re_se_wordend;
  5470. goto add_side_effect;
  5471. break;
  5472. case 'b':
  5473. if (syntax & RE_NO_GNU_OPS)
  5474. goto normal_char;
  5475. side = (rx_side_effect)re_se_wordbound;
  5476. goto add_side_effect;
  5477. break;
  5478. case 'B':
  5479. if (syntax & RE_NO_GNU_OPS)
  5480. goto normal_char;
  5481. side = (rx_side_effect)re_se_notwordbound;
  5482. goto add_side_effect;
  5483. break;
  5484. case '`':
  5485. if (syntax & RE_NO_GNU_OPS)
  5486. goto normal_char;
  5487. side = (rx_side_effect)re_se_begbuf;
  5488. goto add_side_effect;
  5489. break;
  5490. case '\'':
  5491. if (syntax & RE_NO_GNU_OPS)
  5492. goto normal_char;
  5493. side = (rx_side_effect)re_se_endbuf;
  5494. goto add_side_effect;
  5495. break;
  5496. add_side_effect:
  5497. {
  5498. struct rexp_node * se
  5499. = rx_mk_r_side_effect (&rxb->rx, side);
  5500. if (!se)
  5501. return REG_ESPACE;
  5502. append = se;
  5503. goto append_node;
  5504. }
  5505. break;
  5506. case '1': case '2': case '3': case '4': case '5':
  5507. case '6': case '7': case '8': case '9':
  5508. if (syntax & RE_NO_BK_REFS)
  5509. goto normal_char;
  5510. c1 = c - '0';
  5511. if (c1 > regnum)
  5512. return REG_ESUBREG;
  5513. /* Can't back reference to a subexpression if inside of it. */
  5514. if (group_in_compile_stack (compile_stack, c1))
  5515. return REG_ESUBREG;
  5516. {
  5517. int backref_se = paramc;
  5518. params = (params
  5519. ? ((struct re_se_params *)
  5520. realloc (params,
  5521. sizeof (*params) * (1 + paramc)))
  5522. : ((struct re_se_params *)
  5523. malloc (sizeof (*params))));
  5524. if (!params)
  5525. return REG_ESPACE;
  5526. ++paramc;
  5527. params[backref_se].se = re_se_backref;
  5528. params[backref_se].op1 = c1;
  5529. side = (rx_side_effect)backref_se;
  5530. goto add_side_effect;
  5531. }
  5532. break;
  5533. case '+':
  5534. case '?':
  5535. if (syntax & RE_BK_PLUS_QM)
  5536. goto handle_plus;
  5537. else
  5538. goto normal_backslash;
  5539. default:
  5540. normal_backslash:
  5541. /* You might think it would be useful for \ to mean
  5542. not to translate; but if we don't translate it
  5543. it will never match anything. */
  5544. c = TRANSLATE (c);
  5545. goto normal_char;
  5546. }
  5547. break;
  5548. default:
  5549. /* Expects the character in `c'. */
  5550. normal_char:
  5551. {
  5552. rx_Bitset cs = rx_cset(&rxb->rx);
  5553. struct rexp_node * match = rx_mk_r_cset (&rxb->rx, cs);
  5554. rx_Bitset it;
  5555. if (!(cs && match))
  5556. return REG_ESPACE;
  5557. it = inverse_translation (rxb, validate_inv_tr,
  5558. inverse_translate, translate, c);
  5559. rx_bitset_union (CHAR_SET_SIZE, cs, it);
  5560. append = match;
  5561. append_node:
  5562. /* This genericly appends the rexp APPEND to *LAST_EXPRESSION
  5563. * and then parses the next character normally.
  5564. */
  5565. if (*last_expression)
  5566. {
  5567. struct rexp_node * concat
  5568. = rx_mk_r_concat (&rxb->rx, *last_expression, append);
  5569. if (!concat)
  5570. return REG_ESPACE;
  5571. *last_expression = concat;
  5572. last_expression = &concat->params.pair.right;
  5573. }
  5574. else
  5575. *last_expression = append;
  5576. }
  5577. } /* switch (c) */
  5578. } /* while p != pend */
  5579. {
  5580. int win_se = paramc;
  5581. params = (params
  5582. ? ((struct re_se_params *)
  5583. realloc (params,
  5584. sizeof (*params) * (1 + paramc)))
  5585. : ((struct re_se_params *)
  5586. malloc (sizeof (*params))));
  5587. if (!params)
  5588. return REG_ESPACE;
  5589. ++paramc;
  5590. params[win_se].se = re_se_win;
  5591. {
  5592. struct rexp_node * se
  5593. = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)win_se);
  5594. struct rexp_node * concat
  5595. = rx_mk_r_concat (&rxb->rx, rexp, se);
  5596. if (!(se && concat))
  5597. return REG_ESPACE;
  5598. rexp = concat;
  5599. }
  5600. }
  5601. /* Through the pattern now. */
  5602. if (!COMPILE_STACK_EMPTY)
  5603. return REG_EPAREN;
  5604. free (compile_stack.stack);
  5605. orig_rexp = rexp;
  5606. #ifdef RX_DEBUG
  5607. if (rx_debug_compile)
  5608. {
  5609. dbug_rxb = rxb;
  5610. fputs ("\n\nCompiling ", stdout);
  5611. fwrite (pattern, 1, size, stdout);
  5612. fputs (":\n", stdout);
  5613. rxb->se_params = params;
  5614. print_rexp (&rxb->rx, orig_rexp, 2, re_seprint, stdout);
  5615. }
  5616. #endif
  5617. {
  5618. rx_Bitset cs = rx_cset(&rxb->rx);
  5619. rx_Bitset cs2 = rx_cset(&rxb->rx);
  5620. char * se_map = (char *) alloca (paramc);
  5621. struct rexp_node * new_rexp = 0;
  5622. bzero (se_map, paramc);
  5623. find_backrefs (se_map, rexp, params);
  5624. fewer_side_effects =
  5625. remove_unecessary_side_effects (&rxb->rx, se_map,
  5626. rx_copy_rexp (&rxb->rx, rexp), params);
  5627. speed_up_alt (&rxb->rx, rexp, 0);
  5628. speed_up_alt (&rxb->rx, fewer_side_effects, 1);
  5629. {
  5630. char * syntax_parens = rxb->syntax_parens;
  5631. if (syntax_parens == (char *)0x1)
  5632. rexp = remove_unecessary_side_effects
  5633. (&rxb->rx, se_map, rexp, params);
  5634. else if (syntax_parens)
  5635. {
  5636. int x;
  5637. for (x = 0; x < paramc; ++x)
  5638. if (( (params[x].se == re_se_lparen)
  5639. || (params[x].se == re_se_rparen))
  5640. && (!syntax_parens [params[x].op1]))
  5641. se_map [x] = 1;
  5642. rexp = remove_unecessary_side_effects
  5643. (&rxb->rx, se_map, rexp, params);
  5644. }
  5645. }
  5646. /* At least one more optimization would be nice to have here but i ran out
  5647. * of time. The idea would be to delay side effects.
  5648. * For examle, `(abc)' is the same thing as `abc()' except that the
  5649. * left paren is offset by 3 (which we know at compile time).
  5650. * (In this comment, write that second pattern `abc(:3:)'
  5651. * where `(:3:' is a syntactic unit.)
  5652. *
  5653. * Trickier: `(abc|defg)' is the same as `(abc(:3:|defg(:4:))'
  5654. * (The paren nesting may be hard to follow -- that's an alternation
  5655. * of `abc(:3:' and `defg(:4:' inside (purely syntactic) parens
  5656. * followed by the closing paren from the original expression.)
  5657. *
  5658. * Neither the expression tree representation nor the the nfa make
  5659. * this very easy to write. :(
  5660. */
  5661. /* What we compile is different than what the parser returns.
  5662. * Suppose the parser returns expression R.
  5663. * Let R' be R with unnecessary register assignments removed
  5664. * (see REMOVE_UNECESSARY_SIDE_EFFECTS, above).
  5665. *
  5666. * What we will compile is the expression:
  5667. *
  5668. * m{try}R{win}\|s{try}R'{win}
  5669. *
  5670. * {try} and {win} denote side effect epsilons (see EXPLORE_FUTURE).
  5671. *
  5672. * When trying a match, we insert an `m' at the beginning of the
  5673. * string if the user wants registers to be filled, `s' if not.
  5674. */
  5675. new_rexp =
  5676. rx_mk_r_alternate
  5677. (&rxb->rx,
  5678. rx_mk_r_concat (&rxb->rx, rx_mk_r_cset (&rxb->rx, cs2), rexp),
  5679. rx_mk_r_concat (&rxb->rx,
  5680. rx_mk_r_cset (&rxb->rx, cs), fewer_side_effects));
  5681. if (!(new_rexp && cs && cs2))
  5682. return REG_ESPACE;
  5683. RX_bitset_enjoin (cs2, '\0'); /* prefixed to the rexp used for matching. */
  5684. RX_bitset_enjoin (cs, '\1'); /* prefixed to the rexp used for searching. */
  5685. rexp = new_rexp;
  5686. }
  5687. #ifdef RX_DEBUG
  5688. if (rx_debug_compile)
  5689. {
  5690. fputs ("\n...which is compiled as:\n", stdout);
  5691. print_rexp (&rxb->rx, rexp, 2, re_seprint, stdout);
  5692. }
  5693. #endif
  5694. {
  5695. struct rx_nfa_state *start = 0;
  5696. struct rx_nfa_state *end = 0;
  5697. if (!rx_build_nfa (&rxb->rx, rexp, &start, &end))
  5698. return REG_ESPACE; /* */
  5699. else
  5700. {
  5701. void * mem = (void *)rxb->buffer;
  5702. unsigned long size = rxb->allocated;
  5703. int start_id;
  5704. char * perm_mem;
  5705. int iterator_size = paramc * sizeof (params[0]);
  5706. end->is_final = 1;
  5707. start->is_start = 1;
  5708. rx_name_nfa_states (&rxb->rx);
  5709. start_id = start->id;
  5710. #ifdef RX_DEBUG
  5711. if (rx_debug_compile)
  5712. {
  5713. fputs ("...giving the NFA: \n", stdout);
  5714. dbug_rxb = rxb;
  5715. print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
  5716. }
  5717. #endif
  5718. if (!rx_eclose_nfa (&rxb->rx))
  5719. return REG_ESPACE;
  5720. else
  5721. {
  5722. rx_delete_epsilon_transitions (&rxb->rx);
  5723. /* For compatability reasons, we need to shove the
  5724. * compiled nfa into one chunk of malloced memory.
  5725. */
  5726. rxb->rx.reserved = ( sizeof (params[0]) * paramc
  5727. + rx_sizeof_bitset (rxb->rx.local_cset_size));
  5728. #ifdef RX_DEBUG
  5729. if (rx_debug_compile)
  5730. {
  5731. dbug_rxb = rxb;
  5732. fputs ("...which cooks down (uncompactified) to: \n", stdout);
  5733. print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
  5734. }
  5735. #endif
  5736. if (!rx_compactify_nfa (&rxb->rx, &mem, &size))
  5737. return REG_ESPACE;
  5738. rxb->buffer = mem;
  5739. rxb->allocated = size;
  5740. rxb->rx.buffer = mem;
  5741. rxb->rx.allocated = size;
  5742. perm_mem = ((char *)rxb->rx.buffer
  5743. + rxb->rx.allocated - rxb->rx.reserved);
  5744. rxb->se_params = ((struct re_se_params *)perm_mem);
  5745. bcopy (params, rxb->se_params, iterator_size);
  5746. perm_mem += iterator_size;
  5747. rxb->fastset = (rx_Bitset) perm_mem;
  5748. rxb->start = rx_id_to_nfa_state (&rxb->rx, start_id);
  5749. }
  5750. rx_bitset_null (rxb->rx.local_cset_size, rxb->fastset);
  5751. rxb->can_match_empty = compute_fastset (rxb, orig_rexp);
  5752. rxb->match_regs_on_stack =
  5753. registers_on_stack (rxb, orig_rexp, 0, params);
  5754. rxb->search_regs_on_stack =
  5755. registers_on_stack (rxb, fewer_side_effects, 0, params);
  5756. if (rxb->can_match_empty)
  5757. rx_bitset_universe (rxb->rx.local_cset_size, rxb->fastset);
  5758. rxb->is_anchored = is_anchored (orig_rexp, (rx_side_effect) re_se_hat);
  5759. rxb->begbuf_only = is_anchored (orig_rexp,
  5760. (rx_side_effect) re_se_begbuf);
  5761. }
  5762. rx_free_rexp (&rxb->rx, rexp);
  5763. if (params)
  5764. free (params);
  5765. #ifdef RX_DEBUG
  5766. if (rx_debug_compile)
  5767. {
  5768. dbug_rxb = rxb;
  5769. fputs ("...which cooks down to: \n", stdout);
  5770. print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
  5771. }
  5772. #endif
  5773. }
  5774. return REG_NOERROR;
  5775. }
  5776. /* This table gives an error message for each of the error codes listed
  5777. in regex.h. Obviously the order here has to be same as there. */
  5778. __const__ char * rx_error_msg[] =
  5779. { 0, /* REG_NOERROR */
  5780. "No match", /* REG_NOMATCH */
  5781. "Invalid regular expression", /* REG_BADPAT */
  5782. "Invalid collation character", /* REG_ECOLLATE */
  5783. "Invalid character class name", /* REG_ECTYPE */
  5784. "Trailing backslash", /* REG_EESCAPE */
  5785. "Invalid back reference", /* REG_ESUBREG */
  5786. "Unmatched [ or [^", /* REG_EBRACK */
  5787. "Unmatched ( or \\(", /* REG_EPAREN */
  5788. "Unmatched \\{", /* REG_EBRACE */
  5789. "Invalid content of \\{\\}", /* REG_BADBR */
  5790. "Invalid range end", /* REG_ERANGE */
  5791. "Memory exhausted", /* REG_ESPACE */
  5792. "Invalid preceding regular expression", /* REG_BADRPT */
  5793. "Premature end of regular expression", /* REG_EEND */
  5794. "Regular expression too big", /* REG_ESIZE */
  5795. "Unmatched ) or \\)", /* REG_ERPAREN */
  5796. };
  5797. char rx_slowmap [256] =
  5798. {
  5799. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5800. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5801. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5802. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5803. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5804. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5805. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5806. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5807. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5808. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5809. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5810. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5811. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5812. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5813. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5814. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  5815. };
  5816. #ifdef __STDC__
  5817. RX_DECL void
  5818. rx_blow_up_fastmap (struct re_pattern_buffer * rxb)
  5819. #else
  5820. RX_DECL void
  5821. rx_blow_up_fastmap (rxb)
  5822. struct re_pattern_buffer * rxb;
  5823. #endif
  5824. {
  5825. int x;
  5826. for (x = 0; x < 256; ++x) /* &&&& 3.6 % */
  5827. rxb->fastmap [x] = !!RX_bitset_member (rxb->fastset, x);
  5828. rxb->fastmap_accurate = 1;
  5829. }
  5830. #if !defined(REGEX_MALLOC) && !defined(__GNUC__)
  5831. #define RE_SEARCH_2_FN inner_re_search_2
  5832. #define RE_S2_QUAL static
  5833. #else
  5834. #define RE_SEARCH_2_FN re_search_2
  5835. #define RE_S2_QUAL
  5836. #endif
  5837. struct re_search_2_closure
  5838. {
  5839. __const__ char * string1;
  5840. int size1;
  5841. __const__ char * string2;
  5842. int size2;
  5843. };
  5844. RE_S2_QUAL int
  5845. RE_SEARCH_2_FN (struct re_pattern_buffer *,
  5846. __const__ char *,
  5847. int, __const__ char *, int, int,
  5848. int, struct re_registers *, int);
  5849. int re_rx_search (struct re_pattern_buffer *, int,
  5850. int, int, int, rx_get_burst_fn,
  5851. rx_back_check_fn, rx_fetch_char_fn,
  5852. void *, struct re_registers *,
  5853. struct rx_search_state *,
  5854. struct rx_search_state *);
  5855. #if !defined(REGEX_MALLOC) && !defined(__GNUC__)
  5856. int re_search_2 (struct re_pattern_buffer *,
  5857. __const__ char *, int,
  5858. __const__ char *, int,
  5859. int, int, struct re_registers *,
  5860. int);
  5861. #endif
  5862. int re_search (struct re_pattern_buffer *,
  5863. __const__ char *, int, int, int,
  5864. struct re_registers *);
  5865. int re_match_2 (struct re_pattern_buffer *,
  5866. __const__ char *, int,
  5867. __const__ char *, int,
  5868. int, struct re_registers *, int);
  5869. int re_match (struct re_pattern_buffer *,
  5870. __const__ char *, int, int,
  5871. struct re_registers *);
  5872. reg_syntax_t re_set_syntax (reg_syntax_t);
  5873. void re_set_registers (struct re_pattern_buffer *,
  5874. struct re_registers *, unsigned,
  5875. regoff_t *, regoff_t *);
  5876. static int cplx_se_sublist_len (struct rx_se_list *);
  5877. static int posix_se_list_order (struct rx *, struct rx_se_list *,
  5878. struct rx_se_list *);
  5879. __const__ char
  5880. *re_compile_pattern (__const__ char *, int,
  5881. struct re_pattern_buffer *);
  5882. int re_compile_fastmap (struct re_pattern_buffer *);
  5883. char *re_comp (__const__ char *);
  5884. int re_exec (__const__ char *);
  5885. int regcomp (regex_t *, __const__ char *, int);
  5886. int regexec (__const__ regex_t *,
  5887. __const__ char *, size_t,
  5888. regmatch_t pmatch[], int);
  5889. size_t regerror (int, __const__ regex_t *,
  5890. char *, size_t);
  5891. #ifdef __STDC__
  5892. static __inline__ enum rx_get_burst_return
  5893. re_search_2_get_burst ( struct rx_string_position * pos,
  5894. void * vclosure, int stop )
  5895. #else
  5896. static __inline__ enum rx_get_burst_return
  5897. re_search_2_get_burst (pos, vclosure, stop)
  5898. struct rx_string_position * pos;
  5899. void * vclosure;
  5900. int stop;
  5901. #endif
  5902. {
  5903. struct re_search_2_closure * closure;
  5904. closure = (struct re_search_2_closure *)vclosure;
  5905. if (!closure->string2)
  5906. {
  5907. int inset;
  5908. inset = pos->pos - pos->string;
  5909. if ((inset < -1) || (inset > closure->size1))
  5910. return rx_get_burst_no_more;
  5911. else
  5912. {
  5913. pos->pos = (__const__ unsigned char *) closure->string1 + inset;
  5914. pos->string = (__const__ unsigned char *) closure->string1;
  5915. pos->size = closure->size1;
  5916. pos->end = ((__const__ unsigned char *)
  5917. MIN(closure->string1 + closure->size1,
  5918. closure->string1 + stop));
  5919. pos->offset = 0;
  5920. return ((pos->pos < pos->end)
  5921. ? rx_get_burst_ok
  5922. : rx_get_burst_no_more);
  5923. }
  5924. }
  5925. else if (!closure->string1)
  5926. {
  5927. int inset;
  5928. inset = pos->pos - pos->string;
  5929. pos->pos = (__const__ unsigned char *) closure->string2 + inset;
  5930. pos->string = (__const__ unsigned char *) closure->string2;
  5931. pos->size = closure->size2;
  5932. pos->end = ((__const__ unsigned char *)
  5933. MIN(closure->string2 + closure->size2,
  5934. closure->string2 + stop));
  5935. pos->offset = 0;
  5936. return ((pos->pos < pos->end)
  5937. ? rx_get_burst_ok
  5938. : rx_get_burst_no_more);
  5939. }
  5940. else
  5941. {
  5942. int inset;
  5943. inset = pos->pos - pos->string + pos->offset;
  5944. if (inset < closure->size1)
  5945. {
  5946. pos->pos = (__const__ unsigned char *) closure->string1 + inset;
  5947. pos->string = (__const__ unsigned char *) closure->string1;
  5948. pos->size = closure->size1;
  5949. pos->end = ((__const__ unsigned char *)
  5950. MIN(closure->string1 + closure->size1,
  5951. closure->string1 + stop));
  5952. pos->offset = 0;
  5953. return rx_get_burst_ok;
  5954. }
  5955. else
  5956. {
  5957. pos->pos = ((__const__ unsigned char *)
  5958. closure->string2 + inset - closure->size1);
  5959. pos->string = (__const__ unsigned char *) closure->string2;
  5960. pos->size = closure->size2;
  5961. pos->end = ((__const__ unsigned char *)
  5962. MIN(closure->string2 + closure->size2,
  5963. closure->string2 + stop - closure->size1));
  5964. pos->offset = closure->size1;
  5965. return ((pos->pos < pos->end)
  5966. ? rx_get_burst_ok
  5967. : rx_get_burst_no_more);
  5968. }
  5969. }
  5970. }
  5971. #ifdef __STDC__
  5972. static __inline__ enum rx_back_check_return
  5973. re_search_2_back_check ( struct rx_string_position * pos,
  5974. int lparen, int rparen, unsigned char * translate,
  5975. void * vclosure, int stop )
  5976. #else
  5977. static __inline__ enum rx_back_check_return
  5978. re_search_2_back_check (pos, lparen, rparen, translate, vclosure, stop)
  5979. struct rx_string_position * pos;
  5980. int lparen;
  5981. int rparen;
  5982. unsigned char * translate;
  5983. void * vclosure;
  5984. int stop;
  5985. #endif
  5986. {
  5987. struct rx_string_position there;
  5988. struct rx_string_position past;
  5989. there = *pos;
  5990. there.pos = there.string + lparen - there.offset;
  5991. re_search_2_get_burst (&there, vclosure, stop);
  5992. past = *pos;
  5993. past.pos = past.string + rparen - there.offset;
  5994. re_search_2_get_burst (&past, vclosure, stop);
  5995. ++pos->pos;
  5996. re_search_2_get_burst (pos, vclosure, stop);
  5997. while ( (there.pos != past.pos)
  5998. && (pos->pos != pos->end))
  5999. if (TRANSLATE(*there.pos) != TRANSLATE(*pos->pos))
  6000. return rx_back_check_fail;
  6001. else
  6002. {
  6003. ++there.pos;
  6004. ++pos->pos;
  6005. if (there.pos == there.end)
  6006. re_search_2_get_burst (&there, vclosure, stop);
  6007. if (pos->pos == pos->end)
  6008. re_search_2_get_burst (pos, vclosure, stop);
  6009. }
  6010. if (there.pos != past.pos)
  6011. return rx_back_check_fail;
  6012. --pos->pos;
  6013. re_search_2_get_burst (pos, vclosure, stop);
  6014. return rx_back_check_pass;
  6015. }
  6016. #ifdef __STDC__
  6017. static __inline__ int
  6018. re_search_2_fetch_char ( struct rx_string_position * pos, int offset,
  6019. void * app_closure, int stop )
  6020. #else
  6021. static __inline__ int
  6022. re_search_2_fetch_char (pos, offset, app_closure, stop)
  6023. struct rx_string_position * pos;
  6024. int offset;
  6025. void * app_closure;
  6026. int stop;
  6027. #endif
  6028. {
  6029. struct re_search_2_closure * closure;
  6030. closure = (struct re_search_2_closure *)app_closure;
  6031. if (offset == 0)
  6032. {
  6033. if (pos->pos >= pos->string)
  6034. return *pos->pos;
  6035. else
  6036. {
  6037. if ( (pos->string == (__const__ unsigned char *) closure->string2)
  6038. && (closure->string1)
  6039. && (closure->size1))
  6040. return closure->string1[closure->size1 - 1];
  6041. else
  6042. return 0; /* sure, why not. */
  6043. }
  6044. }
  6045. if (pos->pos == pos->end)
  6046. return *closure->string2;
  6047. else
  6048. #if 0
  6049. return pos->pos[1];
  6050. #else
  6051. return pos->pos[offset]; /* FIXME */
  6052. #endif
  6053. }
  6054. #ifdef __STDC__
  6055. RE_S2_QUAL int
  6056. RE_SEARCH_2_FN (struct re_pattern_buffer *rxb,
  6057. __const__ char * string1, int size1,
  6058. __const__ char * string2, int size2,
  6059. int startpos, int range,
  6060. struct re_registers *regs,
  6061. int stop)
  6062. #else
  6063. RE_S2_QUAL int
  6064. RE_SEARCH_2_FN (rxb,
  6065. string1, size1, string2, size2, startpos, range, regs, stop)
  6066. struct re_pattern_buffer *rxb;
  6067. __const__ char * string1;
  6068. int size1;
  6069. __const__ char * string2;
  6070. int size2;
  6071. int startpos;
  6072. int range;
  6073. struct re_registers *regs;
  6074. int stop;
  6075. #endif
  6076. {
  6077. int answer;
  6078. struct re_search_2_closure closure;
  6079. closure.string1 = string1;
  6080. closure.size1 = size1;
  6081. closure.string2 = string2;
  6082. closure.size2 = size2;
  6083. answer = rx_search (rxb, startpos, range, stop, size1 + size2,
  6084. re_search_2_get_burst,
  6085. re_search_2_back_check,
  6086. re_search_2_fetch_char,
  6087. (void *)&closure,
  6088. regs,
  6089. 0,
  6090. 0);
  6091. switch (answer)
  6092. {
  6093. case rx_search_continuation:
  6094. abort ();
  6095. case rx_search_error:
  6096. return -2;
  6097. case rx_search_soft_fail:
  6098. case rx_search_fail:
  6099. return -1;
  6100. default:
  6101. return answer;
  6102. }
  6103. }
  6104. /* Export rx_search to callers outside this file. */
  6105. #ifdef __STDC__
  6106. int
  6107. re_rx_search ( struct re_pattern_buffer * rxb, int startpos, int range,
  6108. int stop, int total_size, rx_get_burst_fn get_burst,
  6109. rx_back_check_fn back_check, rx_fetch_char_fn fetch_char,
  6110. void * app_closure, struct re_registers * regs,
  6111. struct rx_search_state * resume_state,
  6112. struct rx_search_state * save_state )
  6113. #else
  6114. int
  6115. re_rx_search (rxb, startpos, range, stop, total_size,
  6116. get_burst, back_check, fetch_char,
  6117. app_closure, regs, resume_state, save_state)
  6118. struct re_pattern_buffer * rxb;
  6119. int startpos;
  6120. int range;
  6121. int stop;
  6122. int total_size;
  6123. rx_get_burst_fn get_burst;
  6124. rx_back_check_fn back_check;
  6125. rx_fetch_char_fn fetch_char;
  6126. void * app_closure;
  6127. struct re_registers * regs;
  6128. struct rx_search_state * resume_state;
  6129. struct rx_search_state * save_state;
  6130. #endif
  6131. {
  6132. return rx_search (rxb, startpos, range, stop, total_size,
  6133. get_burst, back_check, fetch_char, app_closure,
  6134. regs, resume_state, save_state);
  6135. }
  6136. #if !defined(REGEX_MALLOC) && !defined(__GNUC__)
  6137. #ifdef __STDC__
  6138. int
  6139. re_search_2 (struct re_pattern_buffer *rxb,
  6140. __const__ char * string1, int size1,
  6141. __const__ char * string2, int size2,
  6142. int startpos, int range,
  6143. struct re_registers *regs,
  6144. int stop)
  6145. #else
  6146. int
  6147. re_search_2 (rxb, string1, size1, string2, size2, startpos, range, regs, stop)
  6148. struct re_pattern_buffer *rxb;
  6149. __const__ char * string1;
  6150. int size1;
  6151. __const__ char * string2;
  6152. int size2;
  6153. int startpos;
  6154. int range;
  6155. struct re_registers *regs;
  6156. int stop;
  6157. #endif
  6158. {
  6159. int ret;
  6160. ret = inner_re_search_2 (rxb, string1, size1, string2, size2, startpos,
  6161. range, regs, stop);
  6162. alloca (0);
  6163. return ret;
  6164. }
  6165. #endif
  6166. /* Like re_search_2, above, but only one string is specified, and
  6167. * doesn't let you say where to stop matching.
  6168. */
  6169. #ifdef __STDC__
  6170. int
  6171. re_search (struct re_pattern_buffer * rxb, __const__ char *string,
  6172. int size, int startpos, int range,
  6173. struct re_registers *regs)
  6174. #else
  6175. int
  6176. re_search (rxb, string, size, startpos, range, regs)
  6177. struct re_pattern_buffer * rxb;
  6178. __const__ char * string;
  6179. int size;
  6180. int startpos;
  6181. int range;
  6182. struct re_registers *regs;
  6183. #endif
  6184. {
  6185. return re_search_2 (rxb, 0, 0, string, size, startpos, range, regs, size);
  6186. }
  6187. #ifdef __STDC__
  6188. int
  6189. re_match_2 (struct re_pattern_buffer * rxb,
  6190. __const__ char * string1, int size1,
  6191. __const__ char * string2, int size2,
  6192. int pos, struct re_registers *regs, int stop)
  6193. #else
  6194. int
  6195. re_match_2 (rxb, string1, size1, string2, size2, pos, regs, stop)
  6196. struct re_pattern_buffer * rxb;
  6197. __const__ char * string1;
  6198. int size1;
  6199. __const__ char * string2;
  6200. int size2;
  6201. int pos;
  6202. struct re_registers *regs;
  6203. int stop;
  6204. #endif
  6205. {
  6206. struct re_registers some_regs;
  6207. regoff_t start;
  6208. regoff_t end;
  6209. int srch;
  6210. int save = rxb->regs_allocated;
  6211. struct re_registers * regs_to_pass = regs;
  6212. char *old_fastmap = rxb->fastmap;
  6213. if (!regs)
  6214. {
  6215. some_regs.start = &start;
  6216. some_regs.end = &end;
  6217. some_regs.num_regs = 1;
  6218. regs_to_pass = &some_regs;
  6219. rxb->regs_allocated = REGS_FIXED;
  6220. }
  6221. rxb->fastmap = NULL;
  6222. srch = re_search_2 (rxb, string1, size1, string2, size2,
  6223. pos, 1, regs_to_pass, stop);
  6224. rxb->fastmap = old_fastmap;
  6225. if (regs_to_pass != regs)
  6226. rxb->regs_allocated = save;
  6227. if (srch < 0)
  6228. return srch;
  6229. return regs_to_pass->end[0] - regs_to_pass->start[0];
  6230. }
  6231. /* re_match is like re_match_2 except it takes only a single string. */
  6232. #ifdef __STDC__
  6233. int
  6234. re_match (struct re_pattern_buffer * rxb,
  6235. __const__ char * string,
  6236. int size, int pos,
  6237. struct re_registers *regs)
  6238. #else
  6239. int
  6240. re_match (rxb, string, size, pos, regs)
  6241. struct re_pattern_buffer * rxb;
  6242. __const__ char *string;
  6243. int size;
  6244. int pos;
  6245. struct re_registers *regs;
  6246. #endif
  6247. {
  6248. return re_match_2 (rxb, string, size, 0, 0, pos, regs, size);
  6249. }
  6250. /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
  6251. also be assigned to arbitrarily: each pattern buffer stores its own
  6252. syntax, so it can be changed between regex compilations. */
  6253. reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
  6254. /* Specify the precise syntax of regexps for compilation. This provides
  6255. for compatibility for various utilities which historically have
  6256. different, incompatible syntaxes.
  6257. The argument SYNTAX is a bit mask comprised of the various bits
  6258. defined in regex.h. We return the old syntax. */
  6259. #ifdef __STDC__
  6260. reg_syntax_t
  6261. re_set_syntax (reg_syntax_t syntax)
  6262. #else
  6263. reg_syntax_t
  6264. re_set_syntax (syntax)
  6265. reg_syntax_t syntax;
  6266. #endif
  6267. {
  6268. reg_syntax_t ret = re_syntax_options;
  6269. re_syntax_options = syntax;
  6270. return ret;
  6271. }
  6272. /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
  6273. ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
  6274. this memory for recording register information. STARTS and ENDS
  6275. must be allocated using the malloc library routine, and must each
  6276. be at least NUM_REGS * sizeof (regoff_t) bytes long.
  6277. If NUM_REGS == 0, then subsequent matches should allocate their own
  6278. register data.
  6279. Unless this function is called, the first search or match using
  6280. PATTERN_BUFFER will allocate its own register data, without
  6281. freeing the old data. */
  6282. #ifdef __STDC__
  6283. void
  6284. re_set_registers (struct re_pattern_buffer *bufp,
  6285. struct re_registers *regs,
  6286. unsigned num_regs,
  6287. regoff_t * starts, regoff_t * ends)
  6288. #else
  6289. void
  6290. re_set_registers (bufp, regs, num_regs, starts, ends)
  6291. struct re_pattern_buffer *bufp;
  6292. struct re_registers *regs;
  6293. unsigned num_regs;
  6294. regoff_t * starts;
  6295. regoff_t * ends;
  6296. #endif
  6297. {
  6298. if (num_regs)
  6299. {
  6300. bufp->regs_allocated = REGS_REALLOCATE;
  6301. regs->num_regs = num_regs;
  6302. regs->start = starts;
  6303. regs->end = ends;
  6304. }
  6305. else
  6306. {
  6307. bufp->regs_allocated = REGS_UNALLOCATED;
  6308. regs->num_regs = 0;
  6309. regs->start = regs->end = (regoff_t) 0;
  6310. }
  6311. }
  6312. #ifdef __STDC__
  6313. static int
  6314. cplx_se_sublist_len (struct rx_se_list * list)
  6315. #else
  6316. static int
  6317. cplx_se_sublist_len (list)
  6318. struct rx_se_list * list;
  6319. #endif
  6320. {
  6321. int x = 0;
  6322. while (list)
  6323. {
  6324. if ((long)list->car >= 0)
  6325. ++x;
  6326. list = list->cdr;
  6327. }
  6328. return x;
  6329. }
  6330. /* For rx->se_list_cmp */
  6331. #ifdef __STDC__
  6332. static int
  6333. posix_se_list_order (struct rx * rx,
  6334. struct rx_se_list * a, struct rx_se_list * b)
  6335. #else
  6336. static int
  6337. posix_se_list_order (rx, a, b)
  6338. struct rx * rx;
  6339. struct rx_se_list * a;
  6340. struct rx_se_list * b;
  6341. #endif
  6342. {
  6343. int al = cplx_se_sublist_len (a);
  6344. int bl = cplx_se_sublist_len (b);
  6345. if (!al && !bl)
  6346. return ((a == b)
  6347. ? 0
  6348. : ((a < b) ? -1 : 1));
  6349. else if (!al)
  6350. return -1;
  6351. else if (!bl)
  6352. return 1;
  6353. else
  6354. {
  6355. rx_side_effect * av = ((rx_side_effect *)
  6356. alloca (sizeof (rx_side_effect) * (al + 1)));
  6357. rx_side_effect * bv = ((rx_side_effect *)
  6358. alloca (sizeof (rx_side_effect) * (bl + 1)));
  6359. struct rx_se_list * ap = a;
  6360. struct rx_se_list * bp = b;
  6361. int ai, bi;
  6362. for (ai = al - 1; ai >= 0; --ai)
  6363. {
  6364. while ((long)ap->car < 0)
  6365. ap = ap->cdr;
  6366. av[ai] = ap->car;
  6367. ap = ap->cdr;
  6368. }
  6369. av[al] = (rx_side_effect)-2;
  6370. for (bi = bl - 1; bi >= 0; --bi)
  6371. {
  6372. while ((long)bp->car < 0)
  6373. bp = bp->cdr;
  6374. bv[bi] = bp->car;
  6375. bp = bp->cdr;
  6376. }
  6377. bv[bl] = (rx_side_effect)-1;
  6378. {
  6379. int ret;
  6380. int x = 0;
  6381. while (av[x] == bv[x])
  6382. ++x;
  6383. ret = (((unsigned *)(av[x]) < (unsigned *)(bv[x])) ? -1 : 1);
  6384. return ret;
  6385. }
  6386. }
  6387. }
  6388. /* re_compile_pattern is the GNU regular expression compiler: it
  6389. compiles PATTERN (of length SIZE) and puts the result in RXB.
  6390. Returns 0 if the pattern was valid, otherwise an error string.
  6391. Assumes the `allocated' (and perhaps `buffer') and `translate' fields
  6392. are set in RXB on entry.
  6393. We call rx_compile to do the actual compilation. */
  6394. #ifdef __STDC__
  6395. __const__ char *
  6396. re_compile_pattern (__const__ char *pattern,
  6397. int length,
  6398. struct re_pattern_buffer * rxb)
  6399. #else
  6400. __const__ char *
  6401. re_compile_pattern (pattern, length, rxb)
  6402. __const__ char *pattern;
  6403. int length;
  6404. struct re_pattern_buffer * rxb;
  6405. #endif
  6406. {
  6407. reg_errcode_t ret;
  6408. /* GNU code is written to assume at least RE_NREGS registers will be set
  6409. (and at least one extra will be -1). */
  6410. rxb->regs_allocated = REGS_UNALLOCATED;
  6411. /* And GNU code determines whether or not to get register information
  6412. by passing null for the REGS argument to re_match, etc., not by
  6413. setting no_sub. */
  6414. rxb->no_sub = 0;
  6415. rxb->rx.local_cset_size = 256;
  6416. /* Match anchors at newline. */
  6417. rxb->newline_anchor = 1;
  6418. rxb->re_nsub = 0;
  6419. rxb->start = 0;
  6420. rxb->se_params = 0;
  6421. rxb->rx.nodec = 0;
  6422. rxb->rx.epsnodec = 0;
  6423. rxb->rx.instruction_table = 0;
  6424. rxb->rx.nfa_states = 0;
  6425. rxb->rx.se_list_cmp = posix_se_list_order;
  6426. rxb->rx.start_set = 0;
  6427. ret = rx_compile (pattern, length, re_syntax_options, rxb);
  6428. alloca (0);
  6429. return rx_error_msg[(int) ret];
  6430. }
  6431. #ifdef __STDC__
  6432. int
  6433. re_compile_fastmap (struct re_pattern_buffer * rxb)
  6434. #else
  6435. int
  6436. re_compile_fastmap (rxb)
  6437. struct re_pattern_buffer * rxb;
  6438. #endif
  6439. {
  6440. rx_blow_up_fastmap (rxb);
  6441. return 0;
  6442. }
  6443. /* Entry points compatible with 4.2 BSD regex library. We don't define
  6444. them if this is an Emacs or POSIX compilation. */
  6445. #if (!defined (emacs) && !defined (_POSIX_SOURCE)) || defined(USE_BSD_REGEX)
  6446. /* BSD has one and only one pattern buffer. */
  6447. static struct re_pattern_buffer rx_comp_buf;
  6448. #ifdef __STDC__
  6449. char *
  6450. re_comp (__const__ char *s)
  6451. #else
  6452. char *
  6453. re_comp (s)
  6454. __const__ char *s;
  6455. #endif
  6456. {
  6457. reg_errcode_t ret;
  6458. if (!s || (*s == '\0'))
  6459. {
  6460. if (!rx_comp_buf.buffer)
  6461. return "No previous regular expression";
  6462. return 0;
  6463. }
  6464. if (!rx_comp_buf.fastmap)
  6465. {
  6466. rx_comp_buf.fastmap = (char *) malloc (1 << CHARBITS);
  6467. if (!rx_comp_buf.fastmap)
  6468. return "Memory exhausted";
  6469. }
  6470. /* Since `rx_exec' always passes NULL for the `regs' argument, we
  6471. don't need to initialize the pattern buffer fields which affect it. */
  6472. /* Match anchors at newlines. */
  6473. rx_comp_buf.newline_anchor = 1;
  6474. rx_comp_buf.re_nsub = 0;
  6475. rx_comp_buf.start = 0;
  6476. rx_comp_buf.se_params = 0;
  6477. rx_comp_buf.rx.nodec = 0;
  6478. rx_comp_buf.rx.epsnodec = 0;
  6479. rx_comp_buf.rx.instruction_table = 0;
  6480. rx_comp_buf.rx.nfa_states = 0;
  6481. rx_comp_buf.rx.start = 0;
  6482. rx_comp_buf.rx.se_list_cmp = posix_se_list_order;
  6483. rx_comp_buf.rx.start_set = 0;
  6484. rx_comp_buf.rx.local_cset_size = 256;
  6485. ret = rx_compile (s, strlen (s), re_syntax_options, &rx_comp_buf);
  6486. alloca (0);
  6487. /* Yes, we're discarding `__const__' here. */
  6488. return (char *) rx_error_msg[(int) ret];
  6489. }
  6490. #ifdef __STDC__
  6491. int
  6492. re_exec (__const__ char *s)
  6493. #else
  6494. int
  6495. re_exec (s)
  6496. __const__ char *s;
  6497. #endif
  6498. {
  6499. __const__ int len = strlen (s);
  6500. return
  6501. 0 <= re_search (&rx_comp_buf, s, len, 0, len, (struct re_registers *) 0);
  6502. }
  6503. #endif /* not emacs and not _POSIX_SOURCE */
  6504. /* POSIX.2 functions. Don't define these for Emacs. */
  6505. #if !defined(emacs)
  6506. /* regcomp takes a regular expression as a string and compiles it.
  6507. PREG is a regex_t *. We do not expect any fields to be initialized,
  6508. since POSIX says we shouldn't. Thus, we set
  6509. `buffer' to the compiled pattern;
  6510. `used' to the length of the compiled pattern;
  6511. `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
  6512. REG_EXTENDED bit in CFLAGS is set; otherwise, to
  6513. RE_SYNTAX_POSIX_BASIC;
  6514. `newline_anchor' to REG_NEWLINE being set in CFLAGS;
  6515. `fastmap' and `fastmap_accurate' to zero;
  6516. `re_nsub' to the number of subexpressions in PATTERN.
  6517. PATTERN is the address of the pattern string.
  6518. CFLAGS is a series of bits which affect compilation.
  6519. If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
  6520. use POSIX basic syntax.
  6521. If REG_NEWLINE is set, then . and [^...] don't match newline.
  6522. Also, regexec will try a match beginning after every newline.
  6523. If REG_ICASE is set, then we considers upper- and lowercase
  6524. versions of letters to be equivalent when matching.
  6525. If REG_NOSUB is set, then when PREG is passed to regexec, that
  6526. routine will report only success or failure, and nothing about the
  6527. registers.
  6528. It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
  6529. the return codes and their meanings.) */
  6530. #ifdef __STDC__
  6531. int
  6532. regcomp (regex_t * preg, __const__ char * pattern, int cflags)
  6533. #else
  6534. int
  6535. regcomp (preg, pattern, cflags)
  6536. regex_t * preg;
  6537. __const__ char * pattern;
  6538. int cflags;
  6539. #endif
  6540. {
  6541. reg_errcode_t ret;
  6542. unsigned syntax
  6543. = cflags & REG_EXTENDED ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
  6544. /* regex_compile will allocate the space for the compiled pattern. */
  6545. preg->buffer = 0;
  6546. preg->allocated = 0;
  6547. preg->fastmap = malloc (256);
  6548. if (!preg->fastmap)
  6549. return REG_ESPACE;
  6550. preg->fastmap_accurate = 0;
  6551. if (cflags & REG_ICASE)
  6552. {
  6553. unsigned i;
  6554. preg->translate = (unsigned char *) malloc (256);
  6555. if (!preg->translate)
  6556. return (int) REG_ESPACE;
  6557. /* Map uppercase characters to corresponding lowercase ones. */
  6558. for (i = 0; i < CHAR_SET_SIZE; i++)
  6559. preg->translate[i] = isupper (i) ? tolower (i) : i;
  6560. }
  6561. else
  6562. preg->translate = 0;
  6563. /* If REG_NEWLINE is set, newlines are treated differently. */
  6564. if (cflags & REG_NEWLINE)
  6565. { /* REG_NEWLINE implies neither . nor [^...] match newline. */
  6566. syntax &= ~RE_DOT_NEWLINE;
  6567. syntax |= RE_HAT_LISTS_NOT_NEWLINE;
  6568. /* It also changes the matching behavior. */
  6569. preg->newline_anchor = 1;
  6570. }
  6571. else
  6572. preg->newline_anchor = 0;
  6573. preg->no_sub = !!(cflags & REG_NOSUB);
  6574. /* POSIX says a null character in the pattern terminates it, so we
  6575. can use strlen here in compiling the pattern. */
  6576. preg->re_nsub = 0;
  6577. preg->start = 0;
  6578. preg->se_params = 0;
  6579. preg->syntax_parens = 0;
  6580. preg->rx.nodec = 0;
  6581. preg->rx.epsnodec = 0;
  6582. preg->rx.instruction_table = 0;
  6583. preg->rx.nfa_states = 0;
  6584. preg->rx.local_cset_size = 256;
  6585. preg->rx.start = 0;
  6586. preg->rx.se_list_cmp = posix_se_list_order;
  6587. preg->rx.start_set = 0;
  6588. ret = rx_compile (pattern, strlen (pattern), syntax, preg);
  6589. alloca (0);
  6590. /* POSIX doesn't distinguish between an unmatched open-group and an
  6591. unmatched close-group: both are REG_EPAREN. */
  6592. if (ret == REG_ERPAREN) ret = REG_EPAREN;
  6593. return (int) ret;
  6594. }
  6595. /* regexec searches for a given pattern, specified by PREG, in the
  6596. string STRING.
  6597. If NMATCH is zero or REG_NOSUB was set in the cflags argument to
  6598. `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
  6599. least NMATCH elements, and we set them to the offsets of the
  6600. corresponding matched substrings.
  6601. EFLAGS specifies `execution flags' which affect matching: if
  6602. REG_NOTBOL is set, then ^ does not match at the beginning of the
  6603. string; if REG_NOTEOL is set, then $ does not match at the end.
  6604. We return 0 if we find a match and REG_NOMATCH if not. */
  6605. #ifdef __STDC__
  6606. int
  6607. regexec (__const__ regex_t *preg, __const__ char *string,
  6608. size_t nmatch, regmatch_t pmatch[],
  6609. int eflags)
  6610. #else
  6611. int
  6612. regexec (preg, string, nmatch, pmatch, eflags)
  6613. __const__ regex_t *preg;
  6614. __const__ char *string;
  6615. size_t nmatch;
  6616. regmatch_t pmatch[];
  6617. int eflags;
  6618. #endif
  6619. {
  6620. int ret;
  6621. struct re_registers regs;
  6622. regex_t private_preg;
  6623. int len = strlen (string);
  6624. boolean want_reg_info = !preg->no_sub && nmatch > 0;
  6625. private_preg = *preg;
  6626. private_preg.not_bol = !!(eflags & REG_NOTBOL);
  6627. private_preg.not_eol = !!(eflags & REG_NOTEOL);
  6628. /* The user has told us exactly how many registers to return
  6629. * information about, via `nmatch'. We have to pass that on to the
  6630. * matching routines.
  6631. */
  6632. private_preg.regs_allocated = REGS_FIXED;
  6633. if (want_reg_info)
  6634. {
  6635. regs.num_regs = nmatch;
  6636. regs.start = (( regoff_t *) malloc ((nmatch) * sizeof ( regoff_t)));
  6637. regs.end = (( regoff_t *) malloc ((nmatch) * sizeof ( regoff_t)));
  6638. if (regs.start == 0 || regs.end == 0)
  6639. return (int) REG_NOMATCH;
  6640. }
  6641. /* Perform the searching operation. */
  6642. ret = re_search (&private_preg,
  6643. string, len,
  6644. /* start: */ 0,
  6645. /* range: */ len,
  6646. want_reg_info ? &regs : (struct re_registers *) 0);
  6647. /* Copy the register information to the POSIX structure. */
  6648. if (want_reg_info)
  6649. {
  6650. if (ret >= 0)
  6651. {
  6652. unsigned r;
  6653. for (r = 0; r < nmatch; r++)
  6654. {
  6655. pmatch[r].rm_so = regs.start[r];
  6656. pmatch[r].rm_eo = regs.end[r];
  6657. }
  6658. }
  6659. /* If we needed the temporary register info, free the space now. */
  6660. free (regs.start);
  6661. free (regs.end);
  6662. }
  6663. /* We want zero return to mean success, unlike `re_search'. */
  6664. return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
  6665. }
  6666. /* Returns a message corresponding to an error code, ERRCODE, returned
  6667. from either regcomp or regexec. */
  6668. #ifdef __STDC__
  6669. size_t
  6670. regerror (int errcode, __const__ regex_t *preg,
  6671. char *errbuf, size_t errbuf_size)
  6672. #else
  6673. size_t
  6674. regerror (errcode, preg, errbuf, errbuf_size)
  6675. int errcode;
  6676. __const__ regex_t *preg;
  6677. char *errbuf;
  6678. size_t errbuf_size;
  6679. #endif
  6680. {
  6681. __const__ char *msg
  6682. = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode];
  6683. size_t msg_size = strlen (msg) + 1; /* Includes the 0. */
  6684. if (errbuf_size != 0)
  6685. {
  6686. if (msg_size > errbuf_size)
  6687. {
  6688. strncpy (errbuf, msg, errbuf_size - 1);
  6689. errbuf[errbuf_size - 1] = 0;
  6690. }
  6691. else
  6692. strcpy (errbuf, msg);
  6693. }
  6694. return msg_size;
  6695. }
  6696. /* Free dynamically allocated space used by PREG. */
  6697. #ifdef __STDC__
  6698. void
  6699. regfree (regex_t *preg)
  6700. #else
  6701. void
  6702. regfree (preg)
  6703. regex_t *preg;
  6704. #endif
  6705. {
  6706. if (preg->buffer != 0)
  6707. free (preg->buffer);
  6708. preg->buffer = 0;
  6709. preg->allocated = 0;
  6710. if (preg->fastmap != 0)
  6711. free (preg->fastmap);
  6712. preg->fastmap = 0;
  6713. preg->fastmap_accurate = 0;
  6714. if (preg->translate != 0)
  6715. free (preg->translate);
  6716. preg->translate = 0;
  6717. }
  6718. #endif /* not emacs */