1
0

patch-realtime 871 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193161941619516196161971619816199162001620116202162031620416205162061620716208162091621016211162121621316214162151621616217162181621916220162211622216223162241622516226162271622816229162301623116232162331623416235162361623716238162391624016241162421624316244162451624616247162481624916250162511625216253162541625516256162571625816259162601626116262162631626416265162661626716268162691627016271162721627316274162751627616277162781627916280162811628216283162841628516286162871628816289162901629116292162931629416295162961629716298162991630016301163021630316304163051630616307163081630916310163111631216313163141631516316163171631816319163201632116322163231632416325163261632716328163291633016331163321633316334163351633616337163381633916340163411634216343163441634516346163471634816349163501635116352163531635416355163561635716358163591636016361163621636316364163651636616367163681636916370163711637216373163741637516376163771637816379163801638116382163831638416385163861638716388163891639016391163921639316394163951639616397163981639916400164011640216403164041640516406164071640816409164101641116412164131641416415164161641716418164191642016421164221642316424164251642616427164281642916430164311643216433164341643516436164371643816439164401644116442164431644416445164461644716448164491645016451164521645316454164551645616457164581645916460164611646216463164641646516466164671646816469164701647116472164731647416475164761647716478164791648016481164821648316484164851648616487164881648916490164911649216493164941649516496164971649816499165001650116502165031650416505165061650716508165091651016511165121651316514165151651616517165181651916520165211652216523165241652516526165271652816529165301653116532165331653416535165361653716538165391654016541165421654316544165451654616547165481654916550165511655216553165541655516556165571655816559165601656116562165631656416565165661656716568165691657016571165721657316574165751657616577165781657916580165811658216583165841658516586165871658816589165901659116592165931659416595165961659716598165991660016601166021660316604166051660616607166081660916610166111661216613166141661516616166171661816619166201662116622166231662416625166261662716628166291663016631166321663316634166351663616637166381663916640166411664216643166441664516646166471664816649166501665116652166531665416655166561665716658166591666016661166621666316664166651666616667166681666916670166711667216673166741667516676166771667816679166801668116682166831668416685166861668716688166891669016691166921669316694166951669616697166981669916700167011670216703167041670516706167071670816709167101671116712167131671416715167161671716718167191672016721167221672316724167251672616727167281672916730167311673216733167341673516736167371673816739167401674116742167431674416745167461674716748167491675016751167521675316754167551675616757167581675916760167611676216763167641676516766167671676816769167701677116772167731677416775167761677716778167791678016781167821678316784167851678616787167881678916790167911679216793167941679516796167971679816799168001680116802168031680416805168061680716808168091681016811168121681316814168151681616817168181681916820168211682216823168241682516826168271682816829168301683116832168331683416835168361683716838168391684016841168421684316844168451684616847168481684916850168511685216853168541685516856168571685816859168601686116862168631686416865168661686716868168691687016871168721687316874168751687616877168781687916880168811688216883168841688516886168871688816889168901689116892168931689416895168961689716898168991690016901169021690316904169051690616907169081690916910169111691216913169141691516916169171691816919169201692116922169231692416925169261692716928169291693016931169321693316934169351693616937169381693916940169411694216943169441694516946169471694816949169501695116952169531695416955169561695716958169591696016961169621696316964169651696616967169681696916970169711697216973169741697516976169771697816979169801698116982169831698416985169861698716988169891699016991169921699316994169951699616997169981699917000170011700217003170041700517006170071700817009170101701117012170131701417015170161701717018170191702017021170221702317024170251702617027170281702917030170311703217033170341703517036170371703817039170401704117042170431704417045170461704717048170491705017051170521705317054170551705617057170581705917060170611706217063170641706517066170671706817069170701707117072170731707417075170761707717078170791708017081170821708317084170851708617087170881708917090170911709217093170941709517096170971709817099171001710117102171031710417105171061710717108171091711017111171121711317114171151711617117171181711917120171211712217123171241712517126171271712817129171301713117132171331713417135171361713717138171391714017141171421714317144171451714617147171481714917150171511715217153171541715517156171571715817159171601716117162171631716417165171661716717168171691717017171171721717317174171751717617177171781717917180171811718217183171841718517186171871718817189171901719117192171931719417195171961719717198171991720017201172021720317204172051720617207172081720917210172111721217213172141721517216172171721817219172201722117222172231722417225172261722717228172291723017231172321723317234172351723617237172381723917240172411724217243172441724517246172471724817249172501725117252172531725417255172561725717258172591726017261172621726317264172651726617267172681726917270172711727217273172741727517276172771727817279172801728117282172831728417285172861728717288172891729017291172921729317294172951729617297172981729917300173011730217303173041730517306173071730817309173101731117312173131731417315173161731717318173191732017321173221732317324173251732617327173281732917330173311733217333173341733517336173371733817339173401734117342173431734417345173461734717348173491735017351173521735317354173551735617357173581735917360173611736217363173641736517366173671736817369173701737117372173731737417375173761737717378173791738017381173821738317384173851738617387173881738917390173911739217393173941739517396173971739817399174001740117402174031740417405174061740717408174091741017411174121741317414174151741617417174181741917420174211742217423174241742517426174271742817429174301743117432174331743417435174361743717438174391744017441174421744317444174451744617447174481744917450174511745217453174541745517456174571745817459174601746117462174631746417465174661746717468174691747017471174721747317474174751747617477174781747917480174811748217483174841748517486174871748817489174901749117492174931749417495174961749717498174991750017501175021750317504175051750617507175081750917510175111751217513175141751517516175171751817519175201752117522175231752417525175261752717528175291753017531175321753317534175351753617537175381753917540175411754217543175441754517546175471754817549175501755117552175531755417555175561755717558175591756017561175621756317564175651756617567175681756917570175711757217573175741757517576175771757817579175801758117582175831758417585175861758717588175891759017591175921759317594175951759617597175981759917600176011760217603176041760517606176071760817609176101761117612176131761417615176161761717618176191762017621176221762317624176251762617627176281762917630176311763217633176341763517636176371763817639176401764117642176431764417645176461764717648176491765017651176521765317654176551765617657176581765917660176611766217663176641766517666176671766817669176701767117672176731767417675176761767717678176791768017681176821768317684176851768617687176881768917690176911769217693176941769517696176971769817699177001770117702177031770417705177061770717708177091771017711177121771317714177151771617717177181771917720177211772217723177241772517726177271772817729177301773117732177331773417735177361773717738177391774017741177421774317744177451774617747177481774917750177511775217753177541775517756177571775817759177601776117762177631776417765177661776717768177691777017771177721777317774177751777617777177781777917780177811778217783177841778517786177871778817789177901779117792177931779417795177961779717798177991780017801178021780317804178051780617807178081780917810178111781217813178141781517816178171781817819178201782117822178231782417825178261782717828178291783017831178321783317834178351783617837178381783917840178411784217843178441784517846178471784817849178501785117852178531785417855178561785717858178591786017861178621786317864178651786617867178681786917870178711787217873178741787517876178771787817879178801788117882178831788417885178861788717888178891789017891178921789317894178951789617897178981789917900179011790217903179041790517906179071790817909179101791117912179131791417915179161791717918179191792017921179221792317924179251792617927179281792917930179311793217933179341793517936179371793817939179401794117942179431794417945179461794717948179491795017951179521795317954179551795617957179581795917960179611796217963179641796517966179671796817969179701797117972179731797417975179761797717978179791798017981179821798317984179851798617987179881798917990179911799217993179941799517996179971799817999180001800118002180031800418005180061800718008180091801018011180121801318014180151801618017180181801918020180211802218023180241802518026180271802818029180301803118032180331803418035180361803718038180391804018041180421804318044180451804618047180481804918050180511805218053180541805518056180571805818059180601806118062180631806418065180661806718068180691807018071180721807318074180751807618077180781807918080180811808218083180841808518086180871808818089180901809118092180931809418095180961809718098180991810018101181021810318104181051810618107181081810918110181111811218113181141811518116181171811818119181201812118122181231812418125181261812718128181291813018131181321813318134181351813618137181381813918140181411814218143181441814518146181471814818149181501815118152181531815418155181561815718158181591816018161181621816318164181651816618167181681816918170181711817218173181741817518176181771817818179181801818118182181831818418185181861818718188181891819018191181921819318194181951819618197181981819918200182011820218203182041820518206182071820818209182101821118212182131821418215182161821718218182191822018221182221822318224182251822618227182281822918230182311823218233182341823518236182371823818239182401824118242182431824418245182461824718248182491825018251182521825318254182551825618257182581825918260182611826218263182641826518266182671826818269182701827118272182731827418275182761827718278182791828018281182821828318284182851828618287182881828918290182911829218293182941829518296182971829818299183001830118302183031830418305183061830718308183091831018311183121831318314183151831618317183181831918320183211832218323183241832518326183271832818329183301833118332183331833418335183361833718338183391834018341183421834318344183451834618347183481834918350183511835218353183541835518356183571835818359183601836118362183631836418365183661836718368183691837018371183721837318374183751837618377183781837918380183811838218383183841838518386183871838818389183901839118392183931839418395183961839718398183991840018401184021840318404184051840618407184081840918410184111841218413184141841518416184171841818419184201842118422184231842418425184261842718428184291843018431184321843318434184351843618437184381843918440184411844218443184441844518446184471844818449184501845118452184531845418455184561845718458184591846018461184621846318464184651846618467184681846918470184711847218473184741847518476184771847818479184801848118482184831848418485184861848718488184891849018491184921849318494184951849618497184981849918500185011850218503185041850518506185071850818509185101851118512185131851418515185161851718518185191852018521185221852318524185251852618527185281852918530185311853218533185341853518536185371853818539185401854118542185431854418545185461854718548185491855018551185521855318554185551855618557185581855918560185611856218563185641856518566185671856818569185701857118572185731857418575185761857718578185791858018581185821858318584185851858618587185881858918590185911859218593185941859518596185971859818599186001860118602186031860418605186061860718608186091861018611186121861318614186151861618617186181861918620186211862218623186241862518626186271862818629186301863118632186331863418635186361863718638186391864018641186421864318644186451864618647186481864918650186511865218653186541865518656186571865818659186601866118662186631866418665186661866718668186691867018671186721867318674186751867618677186781867918680186811868218683186841868518686186871868818689186901869118692186931869418695186961869718698186991870018701187021870318704187051870618707187081870918710187111871218713187141871518716187171871818719187201872118722187231872418725187261872718728187291873018731187321873318734187351873618737187381873918740187411874218743187441874518746187471874818749187501875118752187531875418755187561875718758187591876018761187621876318764187651876618767187681876918770187711877218773187741877518776187771877818779187801878118782187831878418785187861878718788187891879018791187921879318794187951879618797187981879918800188011880218803188041880518806188071880818809188101881118812188131881418815188161881718818188191882018821188221882318824188251882618827188281882918830188311883218833188341883518836188371883818839188401884118842188431884418845188461884718848188491885018851188521885318854188551885618857188581885918860188611886218863188641886518866188671886818869188701887118872188731887418875188761887718878188791888018881188821888318884188851888618887188881888918890188911889218893188941889518896188971889818899189001890118902189031890418905189061890718908189091891018911189121891318914189151891618917189181891918920189211892218923189241892518926189271892818929189301893118932189331893418935189361893718938189391894018941189421894318944189451894618947189481894918950189511895218953189541895518956189571895818959189601896118962189631896418965189661896718968189691897018971189721897318974189751897618977189781897918980189811898218983189841898518986189871898818989189901899118992189931899418995189961899718998189991900019001190021900319004190051900619007190081900919010190111901219013190141901519016190171901819019190201902119022190231902419025190261902719028190291903019031190321903319034190351903619037190381903919040190411904219043190441904519046190471904819049190501905119052190531905419055190561905719058190591906019061190621906319064190651906619067190681906919070190711907219073190741907519076190771907819079190801908119082190831908419085190861908719088190891909019091190921909319094190951909619097190981909919100191011910219103191041910519106191071910819109191101911119112191131911419115191161911719118191191912019121191221912319124191251912619127191281912919130191311913219133191341913519136191371913819139191401914119142191431914419145191461914719148191491915019151191521915319154191551915619157191581915919160191611916219163191641916519166191671916819169191701917119172191731917419175191761917719178191791918019181191821918319184191851918619187191881918919190191911919219193191941919519196191971919819199192001920119202192031920419205192061920719208192091921019211192121921319214192151921619217192181921919220192211922219223192241922519226192271922819229192301923119232192331923419235192361923719238192391924019241192421924319244192451924619247192481924919250192511925219253192541925519256192571925819259192601926119262192631926419265192661926719268192691927019271192721927319274192751927619277192781927919280192811928219283192841928519286192871928819289192901929119292192931929419295192961929719298192991930019301193021930319304193051930619307193081930919310193111931219313193141931519316193171931819319193201932119322193231932419325193261932719328193291933019331193321933319334193351933619337193381933919340193411934219343193441934519346193471934819349193501935119352193531935419355193561935719358193591936019361193621936319364193651936619367193681936919370193711937219373193741937519376193771937819379193801938119382193831938419385193861938719388193891939019391193921939319394193951939619397193981939919400194011940219403194041940519406194071940819409194101941119412194131941419415194161941719418194191942019421194221942319424194251942619427194281942919430194311943219433194341943519436194371943819439194401944119442194431944419445194461944719448194491945019451194521945319454194551945619457194581945919460194611946219463194641946519466194671946819469194701947119472194731947419475194761947719478194791948019481194821948319484194851948619487194881948919490194911949219493194941949519496194971949819499195001950119502195031950419505195061950719508195091951019511195121951319514195151951619517195181951919520195211952219523195241952519526195271952819529195301953119532195331953419535195361953719538195391954019541195421954319544195451954619547195481954919550195511955219553195541955519556195571955819559195601956119562195631956419565195661956719568195691957019571195721957319574195751957619577195781957919580195811958219583195841958519586195871958819589195901959119592195931959419595195961959719598195991960019601196021960319604196051960619607196081960919610196111961219613196141961519616196171961819619196201962119622196231962419625196261962719628196291963019631196321963319634196351963619637196381963919640196411964219643196441964519646196471964819649196501965119652196531965419655196561965719658196591966019661196621966319664196651966619667196681966919670196711967219673196741967519676196771967819679196801968119682196831968419685196861968719688196891969019691196921969319694196951969619697196981969919700197011970219703197041970519706197071970819709197101971119712197131971419715197161971719718197191972019721197221972319724197251972619727197281972919730197311973219733197341973519736197371973819739197401974119742197431974419745197461974719748197491975019751197521975319754197551975619757197581975919760197611976219763197641976519766197671976819769197701977119772197731977419775197761977719778197791978019781197821978319784197851978619787197881978919790197911979219793197941979519796197971979819799198001980119802198031980419805198061980719808198091981019811198121981319814198151981619817198181981919820198211982219823198241982519826198271982819829198301983119832198331983419835198361983719838198391984019841198421984319844198451984619847198481984919850198511985219853198541985519856198571985819859198601986119862198631986419865198661986719868198691987019871198721987319874198751987619877198781987919880198811988219883198841988519886198871988819889198901989119892198931989419895198961989719898198991990019901199021990319904199051990619907199081990919910199111991219913199141991519916199171991819919199201992119922199231992419925199261992719928199291993019931199321993319934199351993619937199381993919940199411994219943199441994519946199471994819949199501995119952199531995419955199561995719958199591996019961199621996319964199651996619967199681996919970199711997219973199741997519976199771997819979199801998119982199831998419985199861998719988199891999019991199921999319994199951999619997199981999920000200012000220003200042000520006200072000820009200102001120012200132001420015200162001720018200192002020021200222002320024200252002620027200282002920030200312003220033200342003520036200372003820039200402004120042200432004420045200462004720048200492005020051200522005320054200552005620057200582005920060200612006220063200642006520066200672006820069200702007120072200732007420075200762007720078200792008020081200822008320084200852008620087200882008920090200912009220093200942009520096200972009820099201002010120102201032010420105201062010720108201092011020111201122011320114201152011620117201182011920120201212012220123201242012520126201272012820129201302013120132201332013420135201362013720138201392014020141201422014320144201452014620147201482014920150201512015220153201542015520156201572015820159201602016120162201632016420165201662016720168201692017020171201722017320174201752017620177201782017920180201812018220183201842018520186201872018820189201902019120192201932019420195201962019720198201992020020201202022020320204202052020620207202082020920210202112021220213202142021520216202172021820219202202022120222202232022420225202262022720228202292023020231202322023320234202352023620237202382023920240202412024220243202442024520246202472024820249202502025120252202532025420255202562025720258202592026020261202622026320264202652026620267202682026920270202712027220273202742027520276202772027820279202802028120282202832028420285202862028720288202892029020291202922029320294202952029620297202982029920300203012030220303203042030520306203072030820309203102031120312203132031420315203162031720318203192032020321203222032320324203252032620327203282032920330203312033220333203342033520336203372033820339203402034120342203432034420345203462034720348203492035020351203522035320354203552035620357203582035920360203612036220363203642036520366203672036820369203702037120372203732037420375203762037720378203792038020381203822038320384203852038620387203882038920390203912039220393203942039520396203972039820399204002040120402204032040420405204062040720408204092041020411204122041320414204152041620417204182041920420204212042220423204242042520426204272042820429204302043120432204332043420435204362043720438204392044020441204422044320444204452044620447204482044920450204512045220453204542045520456204572045820459204602046120462204632046420465204662046720468204692047020471204722047320474204752047620477204782047920480204812048220483204842048520486204872048820489204902049120492204932049420495204962049720498204992050020501205022050320504205052050620507205082050920510205112051220513205142051520516205172051820519205202052120522205232052420525205262052720528205292053020531205322053320534205352053620537205382053920540205412054220543205442054520546205472054820549205502055120552205532055420555205562055720558205592056020561205622056320564205652056620567205682056920570205712057220573205742057520576205772057820579205802058120582205832058420585205862058720588205892059020591205922059320594205952059620597205982059920600206012060220603206042060520606206072060820609206102061120612206132061420615206162061720618206192062020621206222062320624206252062620627206282062920630206312063220633206342063520636206372063820639206402064120642206432064420645206462064720648206492065020651206522065320654206552065620657206582065920660206612066220663206642066520666206672066820669206702067120672206732067420675206762067720678206792068020681206822068320684206852068620687206882068920690206912069220693206942069520696206972069820699207002070120702207032070420705207062070720708207092071020711207122071320714207152071620717207182071920720207212072220723207242072520726207272072820729207302073120732207332073420735207362073720738207392074020741207422074320744207452074620747207482074920750207512075220753207542075520756207572075820759207602076120762207632076420765207662076720768207692077020771207722077320774207752077620777207782077920780207812078220783207842078520786207872078820789207902079120792207932079420795207962079720798207992080020801208022080320804208052080620807208082080920810208112081220813208142081520816208172081820819208202082120822208232082420825208262082720828208292083020831208322083320834208352083620837208382083920840208412084220843208442084520846208472084820849208502085120852208532085420855208562085720858208592086020861208622086320864208652086620867208682086920870208712087220873208742087520876208772087820879208802088120882208832088420885208862088720888208892089020891208922089320894208952089620897208982089920900209012090220903209042090520906209072090820909209102091120912209132091420915209162091720918209192092020921209222092320924209252092620927209282092920930209312093220933209342093520936209372093820939209402094120942209432094420945209462094720948209492095020951209522095320954209552095620957209582095920960209612096220963209642096520966209672096820969209702097120972209732097420975209762097720978209792098020981209822098320984209852098620987209882098920990209912099220993209942099520996209972099820999210002100121002210032100421005210062100721008210092101021011210122101321014210152101621017210182101921020210212102221023210242102521026210272102821029210302103121032210332103421035210362103721038210392104021041210422104321044210452104621047210482104921050210512105221053210542105521056210572105821059210602106121062210632106421065210662106721068210692107021071210722107321074210752107621077210782107921080210812108221083210842108521086210872108821089210902109121092210932109421095210962109721098210992110021101211022110321104211052110621107211082110921110211112111221113211142111521116211172111821119211202112121122211232112421125211262112721128211292113021131211322113321134211352113621137211382113921140211412114221143211442114521146211472114821149211502115121152211532115421155211562115721158211592116021161211622116321164211652116621167211682116921170211712117221173211742117521176211772117821179211802118121182211832118421185211862118721188211892119021191211922119321194211952119621197211982119921200212012120221203212042120521206212072120821209212102121121212212132121421215212162121721218212192122021221212222122321224212252122621227212282122921230212312123221233212342123521236212372123821239212402124121242212432124421245212462124721248212492125021251212522125321254212552125621257212582125921260212612126221263212642126521266212672126821269212702127121272212732127421275212762127721278212792128021281212822128321284212852128621287212882128921290212912129221293212942129521296212972129821299213002130121302213032130421305213062130721308213092131021311213122131321314213152131621317213182131921320213212132221323213242132521326213272132821329213302133121332213332133421335213362133721338213392134021341213422134321344213452134621347213482134921350213512135221353213542135521356213572135821359213602136121362213632136421365213662136721368213692137021371213722137321374213752137621377213782137921380213812138221383213842138521386213872138821389213902139121392213932139421395213962139721398213992140021401214022140321404214052140621407214082140921410214112141221413214142141521416214172141821419214202142121422214232142421425214262142721428214292143021431214322143321434214352143621437214382143921440214412144221443214442144521446214472144821449214502145121452214532145421455214562145721458214592146021461214622146321464214652146621467214682146921470214712147221473214742147521476214772147821479214802148121482214832148421485214862148721488214892149021491214922149321494214952149621497214982149921500215012150221503215042150521506215072150821509215102151121512215132151421515215162151721518215192152021521215222152321524215252152621527215282152921530215312153221533215342153521536215372153821539215402154121542215432154421545215462154721548215492155021551215522155321554215552155621557215582155921560215612156221563215642156521566215672156821569215702157121572215732157421575215762157721578215792158021581215822158321584215852158621587215882158921590215912159221593215942159521596215972159821599216002160121602216032160421605216062160721608216092161021611216122161321614216152161621617216182161921620216212162221623216242162521626216272162821629216302163121632216332163421635216362163721638216392164021641216422164321644216452164621647216482164921650216512165221653216542165521656216572165821659216602166121662216632166421665216662166721668216692167021671216722167321674216752167621677216782167921680216812168221683216842168521686216872168821689216902169121692216932169421695216962169721698216992170021701217022170321704217052170621707217082170921710217112171221713217142171521716217172171821719217202172121722217232172421725217262172721728217292173021731217322173321734217352173621737217382173921740217412174221743217442174521746217472174821749217502175121752217532175421755217562175721758217592176021761217622176321764217652176621767217682176921770217712177221773217742177521776217772177821779217802178121782217832178421785217862178721788217892179021791217922179321794217952179621797217982179921800218012180221803218042180521806218072180821809218102181121812218132181421815218162181721818218192182021821218222182321824218252182621827218282182921830218312183221833218342183521836218372183821839218402184121842218432184421845218462184721848218492185021851218522185321854218552185621857218582185921860218612186221863218642186521866218672186821869218702187121872218732187421875218762187721878218792188021881218822188321884218852188621887218882188921890218912189221893218942189521896218972189821899219002190121902219032190421905219062190721908219092191021911219122191321914219152191621917219182191921920219212192221923219242192521926219272192821929219302193121932219332193421935219362193721938219392194021941219422194321944219452194621947219482194921950219512195221953219542195521956219572195821959219602196121962219632196421965219662196721968219692197021971219722197321974219752197621977219782197921980219812198221983219842198521986219872198821989219902199121992219932199421995219962199721998219992200022001220022200322004220052200622007220082200922010220112201222013220142201522016220172201822019220202202122022220232202422025220262202722028220292203022031220322203322034220352203622037220382203922040220412204222043220442204522046220472204822049220502205122052220532205422055220562205722058220592206022061220622206322064220652206622067220682206922070220712207222073220742207522076220772207822079220802208122082220832208422085220862208722088220892209022091220922209322094220952209622097220982209922100221012210222103221042210522106221072210822109221102211122112221132211422115221162211722118221192212022121221222212322124221252212622127221282212922130221312213222133221342213522136221372213822139221402214122142221432214422145221462214722148221492215022151221522215322154221552215622157221582215922160221612216222163221642216522166221672216822169221702217122172221732217422175221762217722178221792218022181221822218322184221852218622187221882218922190221912219222193221942219522196221972219822199222002220122202222032220422205222062220722208222092221022211222122221322214222152221622217222182221922220222212222222223222242222522226222272222822229222302223122232222332223422235222362223722238222392224022241222422224322244222452224622247222482224922250222512225222253222542225522256222572225822259222602226122262222632226422265222662226722268222692227022271222722227322274222752227622277222782227922280222812228222283222842228522286222872228822289222902229122292222932229422295222962229722298222992230022301223022230322304223052230622307223082230922310223112231222313223142231522316223172231822319223202232122322223232232422325223262232722328223292233022331223322233322334223352233622337223382233922340223412234222343223442234522346223472234822349223502235122352223532235422355223562235722358223592236022361223622236322364223652236622367223682236922370223712237222373223742237522376223772237822379223802238122382223832238422385223862238722388223892239022391223922239322394223952239622397223982239922400224012240222403224042240522406224072240822409224102241122412224132241422415224162241722418224192242022421224222242322424224252242622427224282242922430224312243222433224342243522436224372243822439224402244122442224432244422445224462244722448224492245022451224522245322454224552245622457224582245922460224612246222463224642246522466224672246822469224702247122472224732247422475224762247722478224792248022481224822248322484224852248622487224882248922490224912249222493224942249522496224972249822499225002250122502225032250422505225062250722508225092251022511225122251322514225152251622517225182251922520225212252222523225242252522526225272252822529225302253122532225332253422535225362253722538225392254022541225422254322544225452254622547225482254922550225512255222553225542255522556225572255822559225602256122562225632256422565225662256722568225692257022571225722257322574225752257622577225782257922580225812258222583225842258522586225872258822589225902259122592225932259422595225962259722598225992260022601226022260322604226052260622607226082260922610226112261222613226142261522616226172261822619226202262122622226232262422625226262262722628226292263022631226322263322634226352263622637226382263922640226412264222643226442264522646226472264822649226502265122652226532265422655226562265722658226592266022661226622266322664226652266622667226682266922670226712267222673226742267522676226772267822679226802268122682226832268422685226862268722688226892269022691226922269322694226952269622697226982269922700227012270222703227042270522706227072270822709227102271122712227132271422715227162271722718227192272022721227222272322724227252272622727227282272922730227312273222733227342273522736227372273822739227402274122742227432274422745227462274722748227492275022751227522275322754227552275622757227582275922760227612276222763227642276522766227672276822769227702277122772227732277422775227762277722778227792278022781227822278322784227852278622787227882278922790227912279222793227942279522796227972279822799228002280122802228032280422805228062280722808228092281022811228122281322814228152281622817228182281922820228212282222823228242282522826228272282822829228302283122832228332283422835228362283722838228392284022841228422284322844228452284622847228482284922850228512285222853228542285522856228572285822859228602286122862228632286422865228662286722868228692287022871228722287322874228752287622877228782287922880228812288222883228842288522886228872288822889228902289122892228932289422895228962289722898228992290022901229022290322904229052290622907229082290922910229112291222913229142291522916229172291822919229202292122922229232292422925229262292722928229292293022931229322293322934229352293622937229382293922940229412294222943229442294522946229472294822949229502295122952229532295422955229562295722958229592296022961229622296322964229652296622967229682296922970229712297222973229742297522976229772297822979229802298122982229832298422985229862298722988229892299022991229922299322994229952299622997229982299923000230012300223003230042300523006230072300823009230102301123012230132301423015230162301723018230192302023021230222302323024230252302623027230282302923030230312303223033230342303523036230372303823039230402304123042230432304423045230462304723048230492305023051230522305323054230552305623057230582305923060230612306223063230642306523066230672306823069230702307123072230732307423075230762307723078230792308023081230822308323084230852308623087230882308923090230912309223093230942309523096230972309823099231002310123102231032310423105231062310723108231092311023111231122311323114231152311623117231182311923120231212312223123231242312523126231272312823129231302313123132231332313423135231362313723138231392314023141231422314323144231452314623147231482314923150231512315223153231542315523156231572315823159231602316123162231632316423165231662316723168231692317023171231722317323174231752317623177231782317923180231812318223183231842318523186231872318823189231902319123192231932319423195231962319723198231992320023201232022320323204232052320623207232082320923210232112321223213232142321523216232172321823219232202322123222232232322423225232262322723228232292323023231232322323323234232352323623237232382323923240232412324223243232442324523246232472324823249232502325123252232532325423255232562325723258232592326023261232622326323264232652326623267232682326923270232712327223273232742327523276232772327823279232802328123282232832328423285232862328723288232892329023291232922329323294232952329623297232982329923300233012330223303233042330523306233072330823309233102331123312233132331423315233162331723318233192332023321233222332323324233252332623327233282332923330233312333223333233342333523336233372333823339233402334123342233432334423345233462334723348233492335023351233522335323354233552335623357233582335923360233612336223363233642336523366233672336823369233702337123372233732337423375233762337723378233792338023381233822338323384233852338623387233882338923390233912339223393233942339523396233972339823399234002340123402234032340423405234062340723408234092341023411234122341323414234152341623417234182341923420234212342223423234242342523426234272342823429234302343123432234332343423435234362343723438234392344023441234422344323444234452344623447234482344923450234512345223453234542345523456234572345823459234602346123462234632346423465234662346723468234692347023471234722347323474234752347623477234782347923480234812348223483234842348523486234872348823489234902349123492234932349423495234962349723498234992350023501235022350323504235052350623507235082350923510235112351223513235142351523516235172351823519235202352123522235232352423525235262352723528235292353023531235322353323534235352353623537235382353923540235412354223543235442354523546235472354823549235502355123552235532355423555235562355723558235592356023561235622356323564235652356623567235682356923570235712357223573235742357523576235772357823579235802358123582235832358423585235862358723588235892359023591235922359323594235952359623597235982359923600236012360223603236042360523606236072360823609236102361123612236132361423615236162361723618236192362023621236222362323624236252362623627236282362923630236312363223633236342363523636236372363823639236402364123642236432364423645236462364723648236492365023651236522365323654236552365623657236582365923660236612366223663236642366523666236672366823669236702367123672236732367423675236762367723678236792368023681236822368323684236852368623687236882368923690236912369223693236942369523696236972369823699237002370123702237032370423705237062370723708237092371023711237122371323714237152371623717237182371923720237212372223723237242372523726237272372823729237302373123732237332373423735237362373723738237392374023741237422374323744237452374623747237482374923750237512375223753237542375523756237572375823759237602376123762237632376423765237662376723768237692377023771237722377323774237752377623777237782377923780237812378223783237842378523786237872378823789237902379123792237932379423795237962379723798237992380023801238022380323804238052380623807238082380923810238112381223813238142381523816238172381823819238202382123822238232382423825238262382723828238292383023831238322383323834238352383623837238382383923840238412384223843238442384523846238472384823849238502385123852238532385423855238562385723858238592386023861238622386323864238652386623867238682386923870238712387223873238742387523876238772387823879238802388123882238832388423885238862388723888238892389023891238922389323894238952389623897238982389923900239012390223903239042390523906239072390823909239102391123912239132391423915239162391723918239192392023921239222392323924239252392623927239282392923930239312393223933239342393523936239372393823939239402394123942239432394423945239462394723948239492395023951239522395323954239552395623957239582395923960239612396223963239642396523966239672396823969239702397123972239732397423975239762397723978239792398023981239822398323984239852398623987239882398923990239912399223993239942399523996239972399823999240002400124002240032400424005240062400724008240092401024011240122401324014240152401624017240182401924020240212402224023240242402524026240272402824029240302403124032240332403424035240362403724038240392404024041240422404324044240452404624047240482404924050240512405224053240542405524056240572405824059240602406124062240632406424065240662406724068240692407024071240722407324074240752407624077240782407924080240812408224083240842408524086240872408824089240902409124092240932409424095240962409724098240992410024101241022410324104241052410624107241082410924110241112411224113241142411524116241172411824119241202412124122241232412424125241262412724128241292413024131241322413324134241352413624137241382413924140241412414224143241442414524146241472414824149241502415124152241532415424155241562415724158241592416024161241622416324164241652416624167241682416924170241712417224173241742417524176241772417824179241802418124182241832418424185241862418724188241892419024191241922419324194241952419624197241982419924200242012420224203242042420524206242072420824209242102421124212242132421424215242162421724218242192422024221242222422324224242252422624227242282422924230242312423224233242342423524236242372423824239242402424124242242432424424245242462424724248242492425024251242522425324254242552425624257242582425924260242612426224263242642426524266242672426824269242702427124272242732427424275242762427724278242792428024281242822428324284242852428624287242882428924290242912429224293242942429524296242972429824299243002430124302243032430424305243062430724308243092431024311243122431324314243152431624317243182431924320243212432224323243242432524326243272432824329243302433124332243332433424335243362433724338243392434024341243422434324344243452434624347243482434924350243512435224353243542435524356243572435824359243602436124362243632436424365243662436724368243692437024371243722437324374243752437624377243782437924380243812438224383243842438524386243872438824389243902439124392243932439424395243962439724398243992440024401244022440324404244052440624407244082440924410244112441224413244142441524416244172441824419244202442124422244232442424425244262442724428244292443024431244322443324434244352443624437244382443924440244412444224443244442444524446244472444824449244502445124452244532445424455244562445724458244592446024461244622446324464244652446624467244682446924470244712447224473244742447524476244772447824479244802448124482244832448424485244862448724488244892449024491244922449324494244952449624497244982449924500245012450224503245042450524506245072450824509245102451124512245132451424515245162451724518245192452024521245222452324524245252452624527245282452924530245312453224533245342453524536245372453824539245402454124542245432454424545245462454724548245492455024551245522455324554245552455624557245582455924560245612456224563245642456524566245672456824569245702457124572245732457424575245762457724578245792458024581245822458324584245852458624587245882458924590245912459224593245942459524596245972459824599246002460124602246032460424605246062460724608246092461024611246122461324614246152461624617246182461924620246212462224623246242462524626246272462824629246302463124632246332463424635246362463724638246392464024641246422464324644246452464624647246482464924650246512465224653246542465524656246572465824659246602466124662246632466424665246662466724668246692467024671246722467324674246752467624677246782467924680246812468224683246842468524686246872468824689246902469124692246932469424695246962469724698246992470024701247022470324704247052470624707247082470924710247112471224713247142471524716247172471824719247202472124722247232472424725247262472724728247292473024731247322473324734247352473624737247382473924740247412474224743247442474524746247472474824749247502475124752247532475424755247562475724758247592476024761247622476324764247652476624767247682476924770247712477224773247742477524776247772477824779247802478124782247832478424785247862478724788247892479024791247922479324794247952479624797247982479924800248012480224803248042480524806248072480824809248102481124812248132481424815248162481724818248192482024821248222482324824248252482624827248282482924830248312483224833248342483524836248372483824839248402484124842248432484424845248462484724848248492485024851248522485324854248552485624857248582485924860248612486224863248642486524866248672486824869248702487124872248732487424875248762487724878248792488024881248822488324884248852488624887248882488924890248912489224893248942489524896248972489824899249002490124902249032490424905249062490724908249092491024911249122491324914249152491624917249182491924920249212492224923249242492524926249272492824929249302493124932249332493424935249362493724938249392494024941249422494324944249452494624947249482494924950249512495224953249542495524956249572495824959249602496124962249632496424965249662496724968249692497024971249722497324974249752497624977249782497924980249812498224983249842498524986249872498824989249902499124992249932499424995249962499724998249992500025001250022500325004250052500625007250082500925010250112501225013250142501525016250172501825019250202502125022250232502425025250262502725028250292503025031250322503325034250352503625037250382503925040250412504225043250442504525046250472504825049250502505125052250532505425055250562505725058250592506025061250622506325064250652506625067250682506925070250712507225073250742507525076250772507825079250802508125082250832508425085250862508725088250892509025091250922509325094250952509625097250982509925100251012510225103251042510525106251072510825109251102511125112251132511425115251162511725118251192512025121251222512325124251252512625127251282512925130251312513225133251342513525136251372513825139251402514125142251432514425145251462514725148251492515025151251522515325154251552515625157251582515925160251612516225163251642516525166251672516825169251702517125172251732517425175251762517725178251792518025181251822518325184251852518625187251882518925190251912519225193251942519525196251972519825199252002520125202252032520425205252062520725208252092521025211252122521325214252152521625217252182521925220252212522225223252242522525226252272522825229252302523125232252332523425235252362523725238252392524025241252422524325244252452524625247252482524925250252512525225253252542525525256252572525825259252602526125262252632526425265252662526725268252692527025271252722527325274252752527625277252782527925280252812528225283252842528525286252872528825289252902529125292252932529425295252962529725298252992530025301253022530325304253052530625307253082530925310253112531225313253142531525316253172531825319253202532125322253232532425325253262532725328253292533025331253322533325334253352533625337253382533925340253412534225343253442534525346253472534825349253502535125352253532535425355253562535725358253592536025361253622536325364253652536625367253682536925370253712537225373253742537525376253772537825379253802538125382253832538425385253862538725388253892539025391253922539325394253952539625397253982539925400254012540225403254042540525406254072540825409254102541125412254132541425415254162541725418254192542025421254222542325424254252542625427254282542925430254312543225433254342543525436254372543825439254402544125442254432544425445254462544725448254492545025451254522545325454254552545625457254582545925460254612546225463254642546525466254672546825469254702547125472254732547425475254762547725478254792548025481254822548325484254852548625487254882548925490254912549225493254942549525496254972549825499255002550125502255032550425505255062550725508255092551025511255122551325514255152551625517255182551925520255212552225523255242552525526255272552825529255302553125532255332553425535255362553725538255392554025541255422554325544255452554625547255482554925550255512555225553255542555525556255572555825559255602556125562255632556425565255662556725568255692557025571255722557325574255752557625577255782557925580255812558225583255842558525586255872558825589255902559125592255932559425595255962559725598255992560025601256022560325604256052560625607256082560925610256112561225613256142561525616256172561825619256202562125622256232562425625256262562725628256292563025631256322563325634256352563625637256382563925640256412564225643256442564525646256472564825649256502565125652256532565425655256562565725658256592566025661256622566325664256652566625667256682566925670256712567225673256742567525676256772567825679256802568125682256832568425685256862568725688256892569025691256922569325694256952569625697256982569925700257012570225703257042570525706257072570825709257102571125712257132571425715257162571725718257192572025721257222572325724257252572625727257282572925730257312573225733257342573525736257372573825739257402574125742257432574425745257462574725748257492575025751257522575325754257552575625757257582575925760257612576225763257642576525766257672576825769257702577125772257732577425775257762577725778257792578025781257822578325784257852578625787257882578925790257912579225793257942579525796257972579825799258002580125802258032580425805258062580725808258092581025811258122581325814258152581625817258182581925820258212582225823258242582525826258272582825829258302583125832258332583425835258362583725838258392584025841258422584325844258452584625847258482584925850258512585225853258542585525856258572585825859258602586125862258632586425865258662586725868258692587025871258722587325874258752587625877258782587925880258812588225883258842588525886258872588825889258902589125892258932589425895258962589725898258992590025901259022590325904259052590625907259082590925910259112591225913259142591525916259172591825919259202592125922259232592425925259262592725928259292593025931259322593325934259352593625937259382593925940259412594225943259442594525946259472594825949259502595125952259532595425955259562595725958259592596025961259622596325964259652596625967259682596925970259712597225973259742597525976259772597825979259802598125982259832598425985259862598725988259892599025991259922599325994259952599625997259982599926000260012600226003260042600526006260072600826009260102601126012260132601426015260162601726018260192602026021260222602326024260252602626027260282602926030260312603226033260342603526036260372603826039260402604126042260432604426045260462604726048260492605026051260522605326054260552605626057260582605926060260612606226063260642606526066260672606826069260702607126072260732607426075260762607726078260792608026081260822608326084260852608626087260882608926090260912609226093260942609526096260972609826099261002610126102261032610426105261062610726108261092611026111261122611326114261152611626117261182611926120261212612226123261242612526126261272612826129261302613126132261332613426135261362613726138261392614026141261422614326144261452614626147261482614926150261512615226153261542615526156261572615826159261602616126162261632616426165261662616726168261692617026171261722617326174261752617626177261782617926180261812618226183261842618526186261872618826189261902619126192261932619426195261962619726198261992620026201262022620326204262052620626207262082620926210262112621226213262142621526216262172621826219262202622126222262232622426225262262622726228262292623026231262322623326234262352623626237262382623926240262412624226243262442624526246262472624826249262502625126252262532625426255262562625726258262592626026261262622626326264262652626626267262682626926270262712627226273262742627526276262772627826279262802628126282262832628426285262862628726288262892629026291262922629326294262952629626297262982629926300263012630226303263042630526306263072630826309263102631126312263132631426315263162631726318263192632026321263222632326324263252632626327263282632926330263312633226333263342633526336263372633826339263402634126342263432634426345263462634726348263492635026351263522635326354263552635626357263582635926360263612636226363263642636526366263672636826369263702637126372263732637426375263762637726378263792638026381263822638326384263852638626387263882638926390263912639226393263942639526396263972639826399264002640126402264032640426405264062640726408264092641026411264122641326414264152641626417264182641926420264212642226423264242642526426264272642826429264302643126432264332643426435264362643726438264392644026441264422644326444264452644626447264482644926450264512645226453264542645526456264572645826459264602646126462264632646426465264662646726468264692647026471264722647326474264752647626477264782647926480264812648226483264842648526486264872648826489264902649126492264932649426495264962649726498264992650026501265022650326504265052650626507265082650926510265112651226513265142651526516265172651826519265202652126522265232652426525265262652726528265292653026531265322653326534265352653626537265382653926540265412654226543265442654526546265472654826549265502655126552265532655426555265562655726558265592656026561265622656326564265652656626567265682656926570265712657226573265742657526576265772657826579265802658126582265832658426585265862658726588265892659026591265922659326594265952659626597265982659926600266012660226603266042660526606266072660826609266102661126612266132661426615266162661726618266192662026621266222662326624266252662626627266282662926630266312663226633266342663526636266372663826639266402664126642266432664426645266462664726648266492665026651266522665326654266552665626657266582665926660266612666226663266642666526666266672666826669266702667126672266732667426675266762667726678266792668026681266822668326684266852668626687266882668926690266912669226693266942669526696266972669826699267002670126702267032670426705267062670726708267092671026711267122671326714267152671626717267182671926720267212672226723267242672526726267272672826729267302673126732267332673426735267362673726738267392674026741267422674326744267452674626747267482674926750267512675226753267542675526756267572675826759267602676126762267632676426765267662676726768267692677026771267722677326774267752677626777267782677926780267812678226783267842678526786267872678826789267902679126792267932679426795267962679726798267992680026801268022680326804268052680626807268082680926810268112681226813268142681526816268172681826819268202682126822268232682426825268262682726828268292683026831268322683326834268352683626837268382683926840268412684226843268442684526846268472684826849268502685126852268532685426855268562685726858268592686026861268622686326864268652686626867268682686926870268712687226873268742687526876268772687826879268802688126882268832688426885268862688726888268892689026891268922689326894268952689626897268982689926900269012690226903269042690526906269072690826909269102691126912269132691426915269162691726918269192692026921269222692326924269252692626927269282692926930269312693226933269342693526936269372693826939269402694126942269432694426945269462694726948269492695026951269522695326954269552695626957269582695926960269612696226963269642696526966269672696826969269702697126972269732697426975269762697726978269792698026981269822698326984269852698626987269882698926990269912699226993269942699526996269972699826999270002700127002270032700427005270062700727008270092701027011270122701327014270152701627017270182701927020270212702227023270242702527026270272702827029270302703127032270332703427035270362703727038270392704027041270422704327044270452704627047270482704927050270512705227053270542705527056270572705827059270602706127062270632706427065270662706727068270692707027071270722707327074270752707627077270782707927080270812708227083270842708527086270872708827089270902709127092270932709427095270962709727098270992710027101271022710327104271052710627107271082710927110271112711227113271142711527116271172711827119271202712127122271232712427125271262712727128271292713027131271322713327134271352713627137271382713927140271412714227143271442714527146271472714827149271502715127152271532715427155271562715727158271592716027161271622716327164271652716627167271682716927170271712717227173271742717527176271772717827179271802718127182271832718427185271862718727188271892719027191271922719327194271952719627197271982719927200272012720227203272042720527206272072720827209272102721127212272132721427215272162721727218272192722027221272222722327224272252722627227272282722927230272312723227233272342723527236272372723827239272402724127242272432724427245272462724727248272492725027251272522725327254272552725627257272582725927260272612726227263272642726527266272672726827269272702727127272272732727427275272762727727278272792728027281272822728327284272852728627287272882728927290272912729227293272942729527296272972729827299273002730127302273032730427305273062730727308273092731027311273122731327314273152731627317273182731927320273212732227323273242732527326273272732827329273302733127332273332733427335273362733727338273392734027341273422734327344273452734627347273482734927350273512735227353273542735527356273572735827359273602736127362273632736427365273662736727368273692737027371273722737327374273752737627377273782737927380273812738227383273842738527386273872738827389273902739127392273932739427395273962739727398273992740027401274022740327404274052740627407274082740927410274112741227413274142741527416274172741827419274202742127422274232742427425274262742727428274292743027431274322743327434274352743627437274382743927440274412744227443274442744527446274472744827449274502745127452274532745427455274562745727458274592746027461274622746327464274652746627467274682746927470274712747227473274742747527476274772747827479274802748127482274832748427485274862748727488274892749027491274922749327494274952749627497274982749927500275012750227503275042750527506275072750827509275102751127512275132751427515275162751727518275192752027521275222752327524275252752627527275282752927530275312753227533275342753527536275372753827539275402754127542275432754427545275462754727548275492755027551275522755327554275552755627557275582755927560275612756227563275642756527566275672756827569275702757127572275732757427575275762757727578275792758027581275822758327584275852758627587275882758927590275912759227593275942759527596275972759827599276002760127602276032760427605276062760727608276092761027611276122761327614276152761627617276182761927620276212762227623276242762527626276272762827629276302763127632276332763427635276362763727638276392764027641276422764327644276452764627647276482764927650276512765227653276542765527656276572765827659276602766127662276632766427665276662766727668276692767027671276722767327674276752767627677276782767927680276812768227683276842768527686276872768827689276902769127692276932769427695276962769727698276992770027701277022770327704277052770627707277082770927710277112771227713277142771527716277172771827719277202772127722277232772427725277262772727728277292773027731277322773327734277352773627737277382773927740277412774227743277442774527746277472774827749277502775127752277532775427755277562775727758277592776027761277622776327764277652776627767277682776927770277712777227773277742777527776277772777827779277802778127782277832778427785277862778727788277892779027791277922779327794277952779627797277982779927800278012780227803278042780527806278072780827809278102781127812278132781427815278162781727818278192782027821278222782327824278252782627827278282782927830278312783227833278342783527836278372783827839278402784127842278432784427845278462784727848278492785027851278522785327854278552785627857278582785927860278612786227863278642786527866278672786827869278702787127872278732787427875278762787727878278792788027881278822788327884278852788627887278882788927890278912789227893278942789527896278972789827899279002790127902279032790427905279062790727908279092791027911279122791327914279152791627917279182791927920279212792227923279242792527926279272792827929279302793127932279332793427935279362793727938279392794027941279422794327944279452794627947279482794927950279512795227953279542795527956279572795827959279602796127962279632796427965279662796727968279692797027971279722797327974279752797627977279782797927980279812798227983279842798527986279872798827989279902799127992279932799427995279962799727998279992800028001280022800328004280052800628007280082800928010280112801228013280142801528016280172801828019280202802128022280232802428025280262802728028280292803028031280322803328034280352803628037280382803928040280412804228043280442804528046280472804828049280502805128052280532805428055280562805728058280592806028061280622806328064280652806628067280682806928070280712807228073280742807528076280772807828079280802808128082280832808428085280862808728088280892809028091280922809328094280952809628097280982809928100281012810228103281042810528106281072810828109281102811128112281132811428115281162811728118281192812028121281222812328124281252812628127281282812928130281312813228133281342813528136281372813828139281402814128142281432814428145281462814728148281492815028151281522815328154281552815628157281582815928160281612816228163281642816528166281672816828169281702817128172281732817428175281762817728178281792818028181281822818328184281852818628187281882818928190281912819228193281942819528196281972819828199282002820128202282032820428205282062820728208282092821028211282122821328214282152821628217282182821928220282212822228223282242822528226282272822828229282302823128232282332823428235282362823728238282392824028241282422824328244282452824628247282482824928250282512825228253282542825528256282572825828259282602826128262282632826428265282662826728268282692827028271282722827328274282752827628277282782827928280282812828228283282842828528286282872828828289282902829128292282932829428295282962829728298282992830028301283022830328304283052830628307283082830928310283112831228313283142831528316283172831828319283202832128322283232832428325283262832728328283292833028331283322833328334283352833628337283382833928340283412834228343283442834528346283472834828349283502835128352283532835428355283562835728358283592836028361283622836328364283652836628367283682836928370283712837228373283742837528376283772837828379283802838128382283832838428385283862838728388283892839028391283922839328394283952839628397283982839928400284012840228403284042840528406284072840828409284102841128412284132841428415284162841728418284192842028421284222842328424284252842628427284282842928430284312843228433284342843528436284372843828439284402844128442284432844428445284462844728448284492845028451284522845328454284552845628457284582845928460284612846228463284642846528466284672846828469284702847128472284732847428475284762847728478284792848028481284822848328484284852848628487284882848928490284912849228493284942849528496284972849828499285002850128502285032850428505285062850728508285092851028511285122851328514285152851628517285182851928520285212852228523285242852528526285272852828529285302853128532285332853428535285362853728538285392854028541285422854328544285452854628547285482854928550285512855228553285542855528556285572855828559285602856128562285632856428565285662856728568285692857028571285722857328574285752857628577285782857928580285812858228583285842858528586285872858828589285902859128592285932859428595285962859728598285992860028601286022860328604286052860628607286082860928610286112861228613286142861528616286172861828619286202862128622286232862428625286262862728628286292863028631286322863328634286352863628637286382863928640286412864228643286442864528646286472864828649286502865128652286532865428655286562865728658286592866028661286622866328664286652866628667286682866928670286712867228673286742867528676286772867828679286802868128682286832868428685286862868728688286892869028691286922869328694286952869628697286982869928700287012870228703287042870528706287072870828709287102871128712287132871428715287162871728718287192872028721287222872328724287252872628727287282872928730287312873228733287342873528736287372873828739287402874128742287432874428745287462874728748287492875028751287522875328754287552875628757287582875928760287612876228763287642876528766287672876828769287702877128772287732877428775287762877728778287792878028781287822878328784287852878628787287882878928790287912879228793287942879528796287972879828799288002880128802288032880428805288062880728808288092881028811288122881328814288152881628817288182881928820288212882228823288242882528826288272882828829288302883128832288332883428835288362883728838288392884028841288422884328844288452884628847288482884928850288512885228853288542885528856288572885828859288602886128862288632886428865288662886728868288692887028871288722887328874288752887628877288782887928880288812888228883288842888528886288872888828889288902889128892288932889428895288962889728898288992890028901289022890328904289052890628907289082890928910289112891228913289142891528916289172891828919289202892128922289232892428925289262892728928289292893028931289322893328934289352893628937289382893928940289412894228943289442894528946289472894828949289502895128952289532895428955289562895728958289592896028961289622896328964289652896628967289682896928970289712897228973289742897528976289772897828979289802898128982289832898428985289862898728988289892899028991289922899328994289952899628997289982899929000290012900229003290042900529006290072900829009290102901129012290132901429015290162901729018290192902029021290222902329024290252902629027290282902929030290312903229033290342903529036290372903829039290402904129042290432904429045290462904729048290492905029051290522905329054290552905629057290582905929060290612906229063290642906529066290672906829069290702907129072290732907429075290762907729078290792908029081290822908329084290852908629087290882908929090290912909229093290942909529096290972909829099291002910129102291032910429105291062910729108291092911029111291122911329114291152911629117291182911929120291212912229123291242912529126291272912829129291302913129132291332913429135291362913729138291392914029141291422914329144291452914629147291482914929150291512915229153291542915529156291572915829159291602916129162291632916429165291662916729168291692917029171291722917329174291752917629177291782917929180291812918229183291842918529186291872918829189291902919129192291932919429195291962919729198291992920029201292022920329204292052920629207292082920929210292112921229213292142921529216292172921829219292202922129222292232922429225292262922729228292292923029231292322923329234292352923629237292382923929240292412924229243292442924529246292472924829249292502925129252292532925429255292562925729258292592926029261292622926329264292652926629267292682926929270292712927229273292742927529276292772927829279292802928129282292832928429285292862928729288292892929029291292922929329294292952929629297292982929929300293012930229303293042930529306293072930829309293102931129312293132931429315293162931729318293192932029321293222932329324293252932629327293282932929330293312933229333293342933529336293372933829339293402934129342293432934429345293462934729348293492935029351293522935329354293552935629357293582935929360293612936229363293642936529366293672936829369293702937129372293732937429375293762937729378293792938029381293822938329384293852938629387293882938929390293912939229393293942939529396293972939829399294002940129402294032940429405294062940729408294092941029411294122941329414294152941629417294182941929420294212942229423294242942529426294272942829429294302943129432294332943429435294362943729438294392944029441294422944329444294452944629447294482944929450294512945229453294542945529456294572945829459294602946129462294632946429465294662946729468294692947029471294722947329474294752947629477294782947929480294812948229483294842948529486294872948829489294902949129492294932949429495294962949729498294992950029501295022950329504295052950629507295082950929510295112951229513295142951529516295172951829519295202952129522295232952429525295262952729528295292953029531295322953329534295352953629537295382953929540295412954229543295442954529546295472954829549295502955129552295532955429555295562955729558295592956029561295622956329564295652956629567295682956929570295712957229573295742957529576295772957829579295802958129582295832958429585295862958729588295892959029591295922959329594295952959629597295982959929600296012960229603296042960529606296072960829609296102961129612296132961429615296162961729618296192962029621296222962329624296252962629627296282962929630296312963229633296342963529636296372963829639296402964129642296432964429645296462964729648296492965029651296522965329654296552965629657296582965929660296612966229663296642966529666296672966829669296702967129672296732967429675296762967729678296792968029681296822968329684296852968629687296882968929690296912969229693296942969529696296972969829699297002970129702297032970429705297062970729708297092971029711297122971329714297152971629717297182971929720297212972229723297242972529726297272972829729297302973129732297332973429735297362973729738297392974029741297422974329744297452974629747297482974929750297512975229753297542975529756297572975829759297602976129762297632976429765297662976729768297692977029771297722977329774297752977629777297782977929780297812978229783297842978529786297872978829789297902979129792297932979429795297962979729798297992980029801298022980329804298052980629807298082980929810298112981229813298142981529816298172981829819298202982129822298232982429825298262982729828298292983029831298322983329834298352983629837298382983929840298412984229843298442984529846298472984829849298502985129852298532985429855298562985729858298592986029861298622986329864298652986629867298682986929870298712987229873298742987529876298772987829879298802988129882298832988429885298862988729888298892989029891298922989329894298952989629897298982989929900299012990229903299042990529906299072990829909299102991129912299132991429915299162991729918299192992029921299222992329924299252992629927299282992929930299312993229933299342993529936299372993829939299402994129942299432994429945299462994729948299492995029951299522995329954299552995629957299582995929960299612996229963299642996529966299672996829969299702997129972299732997429975299762997729978299792998029981299822998329984299852998629987299882998929990299912999229993299942999529996299972999829999300003000130002300033000430005300063000730008300093001030011300123001330014300153001630017300183001930020300213002230023300243002530026300273002830029300303003130032300333003430035300363003730038300393004030041300423004330044300453004630047300483004930050300513005230053300543005530056300573005830059300603006130062300633006430065300663006730068300693007030071300723007330074300753007630077300783007930080300813008230083300843008530086300873008830089300903009130092300933009430095300963009730098300993010030101301023010330104301053010630107301083010930110301113011230113301143011530116301173011830119301203012130122301233012430125301263012730128301293013030131301323013330134301353013630137301383013930140301413014230143301443014530146301473014830149301503015130152301533015430155301563015730158301593016030161301623016330164301653016630167301683016930170301713017230173301743017530176301773017830179301803018130182301833018430185301863018730188301893019030191301923019330194301953019630197301983019930200302013020230203302043020530206302073020830209302103021130212302133021430215302163021730218302193022030221302223022330224302253022630227302283022930230302313023230233302343023530236302373023830239302403024130242302433024430245302463024730248302493025030251302523025330254302553025630257302583025930260
  1. diff -Nur linux-4.1.39.orig/arch/alpha/mm/fault.c linux-4.1.39/arch/alpha/mm/fault.c
  2. --- linux-4.1.39.orig/arch/alpha/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  3. +++ linux-4.1.39/arch/alpha/mm/fault.c 2017-04-18 17:56:30.549394649 +0200
  4. @@ -23,8 +23,7 @@
  5. #include <linux/smp.h>
  6. #include <linux/interrupt.h>
  7. #include <linux/module.h>
  8. -
  9. -#include <asm/uaccess.h>
  10. +#include <linux/uaccess.h>
  11. extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
  12. @@ -107,7 +106,7 @@
  13. /* If we're in an interrupt context, or have no user context,
  14. we must not take the fault. */
  15. - if (!mm || in_atomic())
  16. + if (!mm || faulthandler_disabled())
  17. goto no_context;
  18. #ifdef CONFIG_ALPHA_LARGE_VMALLOC
  19. diff -Nur linux-4.1.39.orig/arch/arc/include/asm/futex.h linux-4.1.39/arch/arc/include/asm/futex.h
  20. --- linux-4.1.39.orig/arch/arc/include/asm/futex.h 2017-03-13 21:04:36.000000000 +0100
  21. +++ linux-4.1.39/arch/arc/include/asm/futex.h 2017-04-18 17:56:30.549394649 +0200
  22. @@ -53,7 +53,7 @@
  23. if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
  24. return -EFAULT;
  25. - pagefault_disable(); /* implies preempt_disable() */
  26. + pagefault_disable();
  27. switch (op) {
  28. case FUTEX_OP_SET:
  29. @@ -75,7 +75,7 @@
  30. ret = -ENOSYS;
  31. }
  32. - pagefault_enable(); /* subsumes preempt_enable() */
  33. + pagefault_enable();
  34. if (!ret) {
  35. switch (cmp) {
  36. @@ -104,7 +104,7 @@
  37. return ret;
  38. }
  39. -/* Compare-xchg with preemption disabled.
  40. +/* Compare-xchg with pagefaults disabled.
  41. * Notes:
  42. * -Best-Effort: Exchg happens only if compare succeeds.
  43. * If compare fails, returns; leaving retry/looping to upper layers
  44. @@ -121,7 +121,7 @@
  45. if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
  46. return -EFAULT;
  47. - pagefault_disable(); /* implies preempt_disable() */
  48. + pagefault_disable();
  49. /* TBD : can use llock/scond */
  50. __asm__ __volatile__(
  51. @@ -142,7 +142,7 @@
  52. : "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
  53. : "cc", "memory");
  54. - pagefault_enable(); /* subsumes preempt_enable() */
  55. + pagefault_enable();
  56. *uval = val;
  57. return val;
  58. diff -Nur linux-4.1.39.orig/arch/arc/mm/fault.c linux-4.1.39/arch/arc/mm/fault.c
  59. --- linux-4.1.39.orig/arch/arc/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  60. +++ linux-4.1.39/arch/arc/mm/fault.c 2017-04-18 17:56:30.549394649 +0200
  61. @@ -86,7 +86,7 @@
  62. * If we're in an interrupt or have no user
  63. * context, we must not take the fault..
  64. */
  65. - if (in_atomic() || !mm)
  66. + if (faulthandler_disabled() || !mm)
  67. goto no_context;
  68. if (user_mode(regs))
  69. diff -Nur linux-4.1.39.orig/arch/arm/include/asm/cmpxchg.h linux-4.1.39/arch/arm/include/asm/cmpxchg.h
  70. --- linux-4.1.39.orig/arch/arm/include/asm/cmpxchg.h 2017-03-13 21:04:36.000000000 +0100
  71. +++ linux-4.1.39/arch/arm/include/asm/cmpxchg.h 2017-04-18 17:56:30.549394649 +0200
  72. @@ -129,6 +129,8 @@
  73. #else /* min ARCH >= ARMv6 */
  74. +#define __HAVE_ARCH_CMPXCHG 1
  75. +
  76. extern void __bad_cmpxchg(volatile void *ptr, int size);
  77. /*
  78. diff -Nur linux-4.1.39.orig/arch/arm/include/asm/futex.h linux-4.1.39/arch/arm/include/asm/futex.h
  79. --- linux-4.1.39.orig/arch/arm/include/asm/futex.h 2017-03-13 21:04:36.000000000 +0100
  80. +++ linux-4.1.39/arch/arm/include/asm/futex.h 2017-04-18 17:56:30.549394649 +0200
  81. @@ -93,6 +93,7 @@
  82. if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
  83. return -EFAULT;
  84. + preempt_disable();
  85. __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
  86. "1: " TUSER(ldr) " %1, [%4]\n"
  87. " teq %1, %2\n"
  88. @@ -104,6 +105,8 @@
  89. : "cc", "memory");
  90. *uval = val;
  91. + preempt_enable();
  92. +
  93. return ret;
  94. }
  95. @@ -124,7 +127,10 @@
  96. if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
  97. return -EFAULT;
  98. - pagefault_disable(); /* implies preempt_disable() */
  99. +#ifndef CONFIG_SMP
  100. + preempt_disable();
  101. +#endif
  102. + pagefault_disable();
  103. switch (op) {
  104. case FUTEX_OP_SET:
  105. @@ -146,7 +152,10 @@
  106. ret = -ENOSYS;
  107. }
  108. - pagefault_enable(); /* subsumes preempt_enable() */
  109. + pagefault_enable();
  110. +#ifndef CONFIG_SMP
  111. + preempt_enable();
  112. +#endif
  113. if (!ret) {
  114. switch (cmp) {
  115. diff -Nur linux-4.1.39.orig/arch/arm/include/asm/switch_to.h linux-4.1.39/arch/arm/include/asm/switch_to.h
  116. --- linux-4.1.39.orig/arch/arm/include/asm/switch_to.h 2017-03-13 21:04:36.000000000 +0100
  117. +++ linux-4.1.39/arch/arm/include/asm/switch_to.h 2017-04-18 17:56:30.549394649 +0200
  118. @@ -3,6 +3,13 @@
  119. #include <linux/thread_info.h>
  120. +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
  121. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
  122. +#else
  123. +static inline void
  124. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  125. +#endif
  126. +
  127. /*
  128. * For v7 SMP cores running a preemptible kernel we may be pre-empted
  129. * during a TLB maintenance operation, so execute an inner-shareable dsb
  130. @@ -22,6 +29,7 @@
  131. #define switch_to(prev,next,last) \
  132. do { \
  133. + switch_kmaps(prev, next); \
  134. last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
  135. } while (0)
  136. diff -Nur linux-4.1.39.orig/arch/arm/include/asm/thread_info.h linux-4.1.39/arch/arm/include/asm/thread_info.h
  137. --- linux-4.1.39.orig/arch/arm/include/asm/thread_info.h 2017-03-13 21:04:36.000000000 +0100
  138. +++ linux-4.1.39/arch/arm/include/asm/thread_info.h 2017-04-18 17:56:30.549394649 +0200
  139. @@ -50,6 +50,7 @@
  140. struct thread_info {
  141. unsigned long flags; /* low level flags */
  142. int preempt_count; /* 0 => preemptable, <0 => bug */
  143. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  144. mm_segment_t addr_limit; /* address limit */
  145. struct task_struct *task; /* main task structure */
  146. __u32 cpu; /* cpu */
  147. @@ -147,6 +148,7 @@
  148. #define TIF_SIGPENDING 0
  149. #define TIF_NEED_RESCHED 1
  150. #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
  151. +#define TIF_NEED_RESCHED_LAZY 3
  152. #define TIF_UPROBE 7
  153. #define TIF_SYSCALL_TRACE 8
  154. #define TIF_SYSCALL_AUDIT 9
  155. @@ -160,6 +162,7 @@
  156. #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
  157. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  158. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  159. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  160. #define _TIF_UPROBE (1 << TIF_UPROBE)
  161. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  162. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  163. diff -Nur linux-4.1.39.orig/arch/arm/Kconfig linux-4.1.39/arch/arm/Kconfig
  164. --- linux-4.1.39.orig/arch/arm/Kconfig 2017-03-13 21:04:36.000000000 +0100
  165. +++ linux-4.1.39/arch/arm/Kconfig 2017-04-18 17:56:30.549394649 +0200
  166. @@ -31,7 +31,7 @@
  167. select HARDIRQS_SW_RESEND
  168. select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
  169. select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
  170. - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
  171. + select HAVE_ARCH_JUMP_LABEL if (!XIP_KERNEL && !PREEMPT_RT_BASE)
  172. select HAVE_ARCH_KGDB
  173. select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
  174. select HAVE_ARCH_TRACEHOOK
  175. @@ -66,6 +66,7 @@
  176. select HAVE_PERF_EVENTS
  177. select HAVE_PERF_REGS
  178. select HAVE_PERF_USER_STACK_DUMP
  179. + select HAVE_PREEMPT_LAZY
  180. select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
  181. select HAVE_REGS_AND_STACK_ACCESS_API
  182. select HAVE_SYSCALL_TRACEPOINTS
  183. diff -Nur linux-4.1.39.orig/arch/arm/kernel/asm-offsets.c linux-4.1.39/arch/arm/kernel/asm-offsets.c
  184. --- linux-4.1.39.orig/arch/arm/kernel/asm-offsets.c 2017-03-13 21:04:36.000000000 +0100
  185. +++ linux-4.1.39/arch/arm/kernel/asm-offsets.c 2017-04-18 17:56:30.549394649 +0200
  186. @@ -65,6 +65,7 @@
  187. BLANK();
  188. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  189. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  190. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  191. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  192. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  193. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  194. diff -Nur linux-4.1.39.orig/arch/arm/kernel/entry-armv.S linux-4.1.39/arch/arm/kernel/entry-armv.S
  195. --- linux-4.1.39.orig/arch/arm/kernel/entry-armv.S 2017-03-13 21:04:36.000000000 +0100
  196. +++ linux-4.1.39/arch/arm/kernel/entry-armv.S 2017-04-18 17:56:30.549394649 +0200
  197. @@ -208,11 +208,18 @@
  198. #ifdef CONFIG_PREEMPT
  199. get_thread_info tsk
  200. ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
  201. - ldr r0, [tsk, #TI_FLAGS] @ get flags
  202. teq r8, #0 @ if preempt count != 0
  203. + bne 1f @ return from exeption
  204. + ldr r0, [tsk, #TI_FLAGS] @ get flags
  205. + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
  206. + blne svc_preempt @ preempt!
  207. +
  208. + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  209. + teq r8, #0 @ if preempt lazy count != 0
  210. movne r0, #0 @ force flags to 0
  211. - tst r0, #_TIF_NEED_RESCHED
  212. + tst r0, #_TIF_NEED_RESCHED_LAZY
  213. blne svc_preempt
  214. +1:
  215. #endif
  216. svc_exit r5, irq = 1 @ return from exception
  217. @@ -227,8 +234,14 @@
  218. 1: bl preempt_schedule_irq @ irq en/disable is done inside
  219. ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
  220. tst r0, #_TIF_NEED_RESCHED
  221. + bne 1b
  222. + tst r0, #_TIF_NEED_RESCHED_LAZY
  223. reteq r8 @ go again
  224. - b 1b
  225. + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
  226. + teq r0, #0 @ if preempt lazy count != 0
  227. + beq 1b
  228. + ret r8 @ go again
  229. +
  230. #endif
  231. __und_fault:
  232. diff -Nur linux-4.1.39.orig/arch/arm/kernel/patch.c linux-4.1.39/arch/arm/kernel/patch.c
  233. --- linux-4.1.39.orig/arch/arm/kernel/patch.c 2017-03-13 21:04:36.000000000 +0100
  234. +++ linux-4.1.39/arch/arm/kernel/patch.c 2017-04-18 17:56:30.549394649 +0200
  235. @@ -15,7 +15,7 @@
  236. unsigned int insn;
  237. };
  238. -static DEFINE_SPINLOCK(patch_lock);
  239. +static DEFINE_RAW_SPINLOCK(patch_lock);
  240. static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
  241. __acquires(&patch_lock)
  242. @@ -32,7 +32,7 @@
  243. return addr;
  244. if (flags)
  245. - spin_lock_irqsave(&patch_lock, *flags);
  246. + raw_spin_lock_irqsave(&patch_lock, *flags);
  247. else
  248. __acquire(&patch_lock);
  249. @@ -47,7 +47,7 @@
  250. clear_fixmap(fixmap);
  251. if (flags)
  252. - spin_unlock_irqrestore(&patch_lock, *flags);
  253. + raw_spin_unlock_irqrestore(&patch_lock, *flags);
  254. else
  255. __release(&patch_lock);
  256. }
  257. diff -Nur linux-4.1.39.orig/arch/arm/kernel/process.c linux-4.1.39/arch/arm/kernel/process.c
  258. --- linux-4.1.39.orig/arch/arm/kernel/process.c 2017-03-13 21:04:36.000000000 +0100
  259. +++ linux-4.1.39/arch/arm/kernel/process.c 2017-04-18 17:56:30.549394649 +0200
  260. @@ -290,6 +290,30 @@
  261. }
  262. #ifdef CONFIG_MMU
  263. +/*
  264. + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not
  265. + * initialized by pgtable_page_ctor() then a coredump of the vector page will
  266. + * fail.
  267. + */
  268. +static int __init vectors_user_mapping_init_page(void)
  269. +{
  270. + struct page *page;
  271. + unsigned long addr = 0xffff0000;
  272. + pgd_t *pgd;
  273. + pud_t *pud;
  274. + pmd_t *pmd;
  275. +
  276. + pgd = pgd_offset_k(addr);
  277. + pud = pud_offset(pgd, addr);
  278. + pmd = pmd_offset(pud, addr);
  279. + page = pmd_page(*(pmd));
  280. +
  281. + pgtable_page_ctor(page);
  282. +
  283. + return 0;
  284. +}
  285. +late_initcall(vectors_user_mapping_init_page);
  286. +
  287. #ifdef CONFIG_KUSER_HELPERS
  288. /*
  289. * The vectors page is always readable from user space for the
  290. diff -Nur linux-4.1.39.orig/arch/arm/kernel/signal.c linux-4.1.39/arch/arm/kernel/signal.c
  291. --- linux-4.1.39.orig/arch/arm/kernel/signal.c 2017-03-13 21:04:36.000000000 +0100
  292. +++ linux-4.1.39/arch/arm/kernel/signal.c 2017-04-18 17:56:30.549394649 +0200
  293. @@ -568,7 +568,8 @@
  294. do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
  295. {
  296. do {
  297. - if (likely(thread_flags & _TIF_NEED_RESCHED)) {
  298. + if (likely(thread_flags & (_TIF_NEED_RESCHED |
  299. + _TIF_NEED_RESCHED_LAZY))) {
  300. schedule();
  301. } else {
  302. if (unlikely(!user_mode(regs)))
  303. diff -Nur linux-4.1.39.orig/arch/arm/kernel/smp.c linux-4.1.39/arch/arm/kernel/smp.c
  304. --- linux-4.1.39.orig/arch/arm/kernel/smp.c 2017-03-13 21:04:36.000000000 +0100
  305. +++ linux-4.1.39/arch/arm/kernel/smp.c 2017-04-18 17:56:30.549394649 +0200
  306. @@ -213,8 +213,6 @@
  307. flush_cache_louis();
  308. local_flush_tlb_all();
  309. - clear_tasks_mm_cpumask(cpu);
  310. -
  311. return 0;
  312. }
  313. @@ -230,6 +228,9 @@
  314. pr_err("CPU%u: cpu didn't die\n", cpu);
  315. return;
  316. }
  317. +
  318. + clear_tasks_mm_cpumask(cpu);
  319. +
  320. pr_notice("CPU%u: shutdown\n", cpu);
  321. /*
  322. diff -Nur linux-4.1.39.orig/arch/arm/kernel/unwind.c linux-4.1.39/arch/arm/kernel/unwind.c
  323. --- linux-4.1.39.orig/arch/arm/kernel/unwind.c 2017-03-13 21:04:36.000000000 +0100
  324. +++ linux-4.1.39/arch/arm/kernel/unwind.c 2017-04-18 17:56:30.549394649 +0200
  325. @@ -93,7 +93,7 @@
  326. static const struct unwind_idx *__origin_unwind_idx;
  327. extern const struct unwind_idx __stop_unwind_idx[];
  328. -static DEFINE_SPINLOCK(unwind_lock);
  329. +static DEFINE_RAW_SPINLOCK(unwind_lock);
  330. static LIST_HEAD(unwind_tables);
  331. /* Convert a prel31 symbol to an absolute address */
  332. @@ -201,7 +201,7 @@
  333. /* module unwind tables */
  334. struct unwind_table *table;
  335. - spin_lock_irqsave(&unwind_lock, flags);
  336. + raw_spin_lock_irqsave(&unwind_lock, flags);
  337. list_for_each_entry(table, &unwind_tables, list) {
  338. if (addr >= table->begin_addr &&
  339. addr < table->end_addr) {
  340. @@ -213,7 +213,7 @@
  341. break;
  342. }
  343. }
  344. - spin_unlock_irqrestore(&unwind_lock, flags);
  345. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  346. }
  347. pr_debug("%s: idx = %p\n", __func__, idx);
  348. @@ -529,9 +529,9 @@
  349. tab->begin_addr = text_addr;
  350. tab->end_addr = text_addr + text_size;
  351. - spin_lock_irqsave(&unwind_lock, flags);
  352. + raw_spin_lock_irqsave(&unwind_lock, flags);
  353. list_add_tail(&tab->list, &unwind_tables);
  354. - spin_unlock_irqrestore(&unwind_lock, flags);
  355. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  356. return tab;
  357. }
  358. @@ -543,9 +543,9 @@
  359. if (!tab)
  360. return;
  361. - spin_lock_irqsave(&unwind_lock, flags);
  362. + raw_spin_lock_irqsave(&unwind_lock, flags);
  363. list_del(&tab->list);
  364. - spin_unlock_irqrestore(&unwind_lock, flags);
  365. + raw_spin_unlock_irqrestore(&unwind_lock, flags);
  366. kfree(tab);
  367. }
  368. diff -Nur linux-4.1.39.orig/arch/arm/kvm/arm.c linux-4.1.39/arch/arm/kvm/arm.c
  369. --- linux-4.1.39.orig/arch/arm/kvm/arm.c 2017-03-13 21:04:36.000000000 +0100
  370. +++ linux-4.1.39/arch/arm/kvm/arm.c 2017-04-18 17:56:30.549394649 +0200
  371. @@ -473,9 +473,9 @@
  372. static void vcpu_pause(struct kvm_vcpu *vcpu)
  373. {
  374. - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
  375. + struct swait_head *wq = kvm_arch_vcpu_wq(vcpu);
  376. - wait_event_interruptible(*wq, !vcpu->arch.pause);
  377. + swait_event_interruptible(*wq, !vcpu->arch.pause);
  378. }
  379. static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
  380. diff -Nur linux-4.1.39.orig/arch/arm/kvm/psci.c linux-4.1.39/arch/arm/kvm/psci.c
  381. --- linux-4.1.39.orig/arch/arm/kvm/psci.c 2017-03-13 21:04:36.000000000 +0100
  382. +++ linux-4.1.39/arch/arm/kvm/psci.c 2017-04-18 17:56:30.549394649 +0200
  383. @@ -68,7 +68,7 @@
  384. {
  385. struct kvm *kvm = source_vcpu->kvm;
  386. struct kvm_vcpu *vcpu = NULL;
  387. - wait_queue_head_t *wq;
  388. + struct swait_head *wq;
  389. unsigned long cpu_id;
  390. unsigned long context_id;
  391. phys_addr_t target_pc;
  392. @@ -117,7 +117,7 @@
  393. smp_mb(); /* Make sure the above is visible */
  394. wq = kvm_arch_vcpu_wq(vcpu);
  395. - wake_up_interruptible(wq);
  396. + swait_wake_interruptible(wq);
  397. return PSCI_RET_SUCCESS;
  398. }
  399. diff -Nur linux-4.1.39.orig/arch/arm/mach-at91/at91rm9200.c linux-4.1.39/arch/arm/mach-at91/at91rm9200.c
  400. --- linux-4.1.39.orig/arch/arm/mach-at91/at91rm9200.c 2017-03-13 21:04:36.000000000 +0100
  401. +++ linux-4.1.39/arch/arm/mach-at91/at91rm9200.c 2017-04-18 17:56:30.549394649 +0200
  402. @@ -13,7 +13,6 @@
  403. #include <linux/of_platform.h>
  404. #include <asm/mach/arch.h>
  405. -#include <asm/system_misc.h>
  406. #include "generic.h"
  407. #include "soc.h"
  408. @@ -34,7 +33,6 @@
  409. of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
  410. - arm_pm_idle = at91rm9200_idle;
  411. at91rm9200_pm_init();
  412. }
  413. diff -Nur linux-4.1.39.orig/arch/arm/mach-at91/at91sam9.c linux-4.1.39/arch/arm/mach-at91/at91sam9.c
  414. --- linux-4.1.39.orig/arch/arm/mach-at91/at91sam9.c 2017-03-13 21:04:36.000000000 +0100
  415. +++ linux-4.1.39/arch/arm/mach-at91/at91sam9.c 2017-04-18 17:56:30.549394649 +0200
  416. @@ -62,8 +62,6 @@
  417. soc_dev = soc_device_to_device(soc);
  418. of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
  419. -
  420. - arm_pm_idle = at91sam9_idle;
  421. }
  422. static void __init at91sam9_dt_device_init(void)
  423. diff -Nur linux-4.1.39.orig/arch/arm/mach-at91/generic.h linux-4.1.39/arch/arm/mach-at91/generic.h
  424. --- linux-4.1.39.orig/arch/arm/mach-at91/generic.h 2017-03-13 21:04:36.000000000 +0100
  425. +++ linux-4.1.39/arch/arm/mach-at91/generic.h 2017-04-18 17:56:30.549394649 +0200
  426. @@ -11,27 +11,18 @@
  427. #ifndef _AT91_GENERIC_H
  428. #define _AT91_GENERIC_H
  429. -#include <linux/of.h>
  430. -#include <linux/reboot.h>
  431. -
  432. - /* Map io */
  433. -extern void __init at91_map_io(void);
  434. -extern void __init at91_alt_map_io(void);
  435. -
  436. -/* idle */
  437. -extern void at91rm9200_idle(void);
  438. -extern void at91sam9_idle(void);
  439. -
  440. #ifdef CONFIG_PM
  441. extern void __init at91rm9200_pm_init(void);
  442. extern void __init at91sam9260_pm_init(void);
  443. extern void __init at91sam9g45_pm_init(void);
  444. extern void __init at91sam9x5_pm_init(void);
  445. +extern void __init sama5_pm_init(void);
  446. #else
  447. static inline void __init at91rm9200_pm_init(void) { }
  448. static inline void __init at91sam9260_pm_init(void) { }
  449. static inline void __init at91sam9g45_pm_init(void) { }
  450. static inline void __init at91sam9x5_pm_init(void) { }
  451. +static inline void __init sama5_pm_init(void) { }
  452. #endif
  453. #endif /* _AT91_GENERIC_H */
  454. diff -Nur linux-4.1.39.orig/arch/arm/mach-at91/pm.c linux-4.1.39/arch/arm/mach-at91/pm.c
  455. --- linux-4.1.39.orig/arch/arm/mach-at91/pm.c 2017-03-13 21:04:36.000000000 +0100
  456. +++ linux-4.1.39/arch/arm/mach-at91/pm.c 2017-04-18 17:56:30.549394649 +0200
  457. @@ -31,10 +31,13 @@
  458. #include <asm/mach/irq.h>
  459. #include <asm/fncpy.h>
  460. #include <asm/cacheflush.h>
  461. +#include <asm/system_misc.h>
  462. #include "generic.h"
  463. #include "pm.h"
  464. +static void __iomem *pmc;
  465. +
  466. /*
  467. * FIXME: this is needed to communicate between the pinctrl driver and
  468. * the PM implementation in the machine. Possibly part of the PM
  469. @@ -85,7 +88,7 @@
  470. unsigned long scsr;
  471. int i;
  472. - scsr = at91_pmc_read(AT91_PMC_SCSR);
  473. + scsr = readl(pmc + AT91_PMC_SCSR);
  474. /* USB must not be using PLLB */
  475. if ((scsr & at91_pm_data.uhp_udp_mask) != 0) {
  476. @@ -99,8 +102,7 @@
  477. if ((scsr & (AT91_PMC_PCK0 << i)) == 0)
  478. continue;
  479. -
  480. - css = at91_pmc_read(AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
  481. + css = readl(pmc + AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
  482. if (css != AT91_PMC_CSS_SLOW) {
  483. pr_err("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css);
  484. return 0;
  485. @@ -143,8 +145,8 @@
  486. flush_cache_all();
  487. outer_disable();
  488. - at91_suspend_sram_fn(at91_pmc_base, at91_ramc_base[0],
  489. - at91_ramc_base[1], pm_data);
  490. + at91_suspend_sram_fn(pmc, at91_ramc_base[0],
  491. + at91_ramc_base[1], pm_data);
  492. outer_resume();
  493. }
  494. @@ -348,6 +350,21 @@
  495. at91_pm_set_standby(standby);
  496. }
  497. +void at91rm9200_idle(void)
  498. +{
  499. + /*
  500. + * Disable the processor clock. The processor will be automatically
  501. + * re-enabled by an interrupt or by a reset.
  502. + */
  503. + writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR);
  504. +}
  505. +
  506. +void at91sam9_idle(void)
  507. +{
  508. + writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR);
  509. + cpu_do_idle();
  510. +}
  511. +
  512. static void __init at91_pm_sram_init(void)
  513. {
  514. struct gen_pool *sram_pool;
  515. @@ -394,13 +411,36 @@
  516. &at91_pm_suspend_in_sram, at91_pm_suspend_in_sram_sz);
  517. }
  518. -static void __init at91_pm_init(void)
  519. +static const struct of_device_id atmel_pmc_ids[] __initconst = {
  520. + { .compatible = "atmel,at91rm9200-pmc" },
  521. + { .compatible = "atmel,at91sam9260-pmc" },
  522. + { .compatible = "atmel,at91sam9g45-pmc" },
  523. + { .compatible = "atmel,at91sam9n12-pmc" },
  524. + { .compatible = "atmel,at91sam9x5-pmc" },
  525. + { .compatible = "atmel,sama5d3-pmc" },
  526. + { .compatible = "atmel,sama5d2-pmc" },
  527. + { /* sentinel */ },
  528. +};
  529. +
  530. +static void __init at91_pm_init(void (*pm_idle)(void))
  531. {
  532. - at91_pm_sram_init();
  533. + struct device_node *pmc_np;
  534. if (at91_cpuidle_device.dev.platform_data)
  535. platform_device_register(&at91_cpuidle_device);
  536. + pmc_np = of_find_matching_node(NULL, atmel_pmc_ids);
  537. + pmc = of_iomap(pmc_np, 0);
  538. + if (!pmc) {
  539. + pr_err("AT91: PM not supported, PMC not found\n");
  540. + return;
  541. + }
  542. +
  543. + if (pm_idle)
  544. + arm_pm_idle = pm_idle;
  545. +
  546. + at91_pm_sram_init();
  547. +
  548. if (at91_suspend_sram_fn)
  549. suspend_set_ops(&at91_pm_ops);
  550. else
  551. @@ -419,7 +459,7 @@
  552. at91_pm_data.uhp_udp_mask = AT91RM9200_PMC_UHP | AT91RM9200_PMC_UDP;
  553. at91_pm_data.memctrl = AT91_MEMCTRL_MC;
  554. - at91_pm_init();
  555. + at91_pm_init(at91rm9200_idle);
  556. }
  557. void __init at91sam9260_pm_init(void)
  558. @@ -427,7 +467,7 @@
  559. at91_dt_ramc();
  560. at91_pm_data.memctrl = AT91_MEMCTRL_SDRAMC;
  561. at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
  562. - return at91_pm_init();
  563. + at91_pm_init(at91sam9_idle);
  564. }
  565. void __init at91sam9g45_pm_init(void)
  566. @@ -435,7 +475,7 @@
  567. at91_dt_ramc();
  568. at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP;
  569. at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
  570. - return at91_pm_init();
  571. + at91_pm_init(at91sam9_idle);
  572. }
  573. void __init at91sam9x5_pm_init(void)
  574. @@ -443,5 +483,13 @@
  575. at91_dt_ramc();
  576. at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
  577. at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
  578. - return at91_pm_init();
  579. + at91_pm_init(at91sam9_idle);
  580. +}
  581. +
  582. +void __init sama5_pm_init(void)
  583. +{
  584. + at91_dt_ramc();
  585. + at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP;
  586. + at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR;
  587. + at91_pm_init(NULL);
  588. }
  589. diff -Nur linux-4.1.39.orig/arch/arm/mach-at91/sama5.c linux-4.1.39/arch/arm/mach-at91/sama5.c
  590. --- linux-4.1.39.orig/arch/arm/mach-at91/sama5.c 2017-03-13 21:04:36.000000000 +0100
  591. +++ linux-4.1.39/arch/arm/mach-at91/sama5.c 2017-04-18 17:56:30.549394649 +0200
  592. @@ -49,7 +49,7 @@
  593. soc_dev = soc_device_to_device(soc);
  594. of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev);
  595. - at91sam9x5_pm_init();
  596. + sama5_pm_init();
  597. }
  598. static const char *sama5_dt_board_compat[] __initconst = {
  599. diff -Nur linux-4.1.39.orig/arch/arm/mach-exynos/platsmp.c linux-4.1.39/arch/arm/mach-exynos/platsmp.c
  600. --- linux-4.1.39.orig/arch/arm/mach-exynos/platsmp.c 2017-03-13 21:04:36.000000000 +0100
  601. +++ linux-4.1.39/arch/arm/mach-exynos/platsmp.c 2017-04-18 17:56:30.549394649 +0200
  602. @@ -231,7 +231,7 @@
  603. return (void __iomem *)(S5P_VA_SCU);
  604. }
  605. -static DEFINE_SPINLOCK(boot_lock);
  606. +static DEFINE_RAW_SPINLOCK(boot_lock);
  607. static void exynos_secondary_init(unsigned int cpu)
  608. {
  609. @@ -244,8 +244,8 @@
  610. /*
  611. * Synchronise with the boot thread.
  612. */
  613. - spin_lock(&boot_lock);
  614. - spin_unlock(&boot_lock);
  615. + raw_spin_lock(&boot_lock);
  616. + raw_spin_unlock(&boot_lock);
  617. }
  618. static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
  619. @@ -259,7 +259,7 @@
  620. * Set synchronisation state between this boot processor
  621. * and the secondary one
  622. */
  623. - spin_lock(&boot_lock);
  624. + raw_spin_lock(&boot_lock);
  625. /*
  626. * The secondary processor is waiting to be released from
  627. @@ -286,7 +286,7 @@
  628. if (timeout == 0) {
  629. printk(KERN_ERR "cpu1 power enable failed");
  630. - spin_unlock(&boot_lock);
  631. + raw_spin_unlock(&boot_lock);
  632. return -ETIMEDOUT;
  633. }
  634. }
  635. @@ -342,7 +342,7 @@
  636. * calibrations, then wait for it to finish
  637. */
  638. fail:
  639. - spin_unlock(&boot_lock);
  640. + raw_spin_unlock(&boot_lock);
  641. return pen_release != -1 ? ret : 0;
  642. }
  643. diff -Nur linux-4.1.39.orig/arch/arm/mach-hisi/platmcpm.c linux-4.1.39/arch/arm/mach-hisi/platmcpm.c
  644. --- linux-4.1.39.orig/arch/arm/mach-hisi/platmcpm.c 2017-03-13 21:04:36.000000000 +0100
  645. +++ linux-4.1.39/arch/arm/mach-hisi/platmcpm.c 2017-04-18 17:56:30.549394649 +0200
  646. @@ -57,7 +57,7 @@
  647. static void __iomem *sysctrl, *fabric;
  648. static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
  649. -static DEFINE_SPINLOCK(boot_lock);
  650. +static DEFINE_RAW_SPINLOCK(boot_lock);
  651. static u32 fabric_phys_addr;
  652. /*
  653. * [0]: bootwrapper physical address
  654. @@ -104,7 +104,7 @@
  655. if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
  656. return -EINVAL;
  657. - spin_lock_irq(&boot_lock);
  658. + raw_spin_lock_irq(&boot_lock);
  659. if (hip04_cpu_table[cluster][cpu])
  660. goto out;
  661. @@ -133,7 +133,7 @@
  662. udelay(20);
  663. out:
  664. hip04_cpu_table[cluster][cpu]++;
  665. - spin_unlock_irq(&boot_lock);
  666. + raw_spin_unlock_irq(&boot_lock);
  667. return 0;
  668. }
  669. @@ -149,7 +149,7 @@
  670. __mcpm_cpu_going_down(cpu, cluster);
  671. - spin_lock(&boot_lock);
  672. + raw_spin_lock(&boot_lock);
  673. BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
  674. hip04_cpu_table[cluster][cpu]--;
  675. if (hip04_cpu_table[cluster][cpu] == 1) {
  676. @@ -162,7 +162,7 @@
  677. last_man = hip04_cluster_is_down(cluster);
  678. if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
  679. - spin_unlock(&boot_lock);
  680. + raw_spin_unlock(&boot_lock);
  681. /* Since it's Cortex A15, disable L2 prefetching. */
  682. asm volatile(
  683. "mcr p15, 1, %0, c15, c0, 3 \n\t"
  684. @@ -173,7 +173,7 @@
  685. hip04_set_snoop_filter(cluster, 0);
  686. __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
  687. } else {
  688. - spin_unlock(&boot_lock);
  689. + raw_spin_unlock(&boot_lock);
  690. v7_exit_coherency_flush(louis);
  691. }
  692. @@ -192,7 +192,7 @@
  693. cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
  694. count = TIMEOUT_MSEC / POLL_MSEC;
  695. - spin_lock_irq(&boot_lock);
  696. + raw_spin_lock_irq(&boot_lock);
  697. for (tries = 0; tries < count; tries++) {
  698. if (hip04_cpu_table[cluster][cpu]) {
  699. ret = -EBUSY;
  700. @@ -202,10 +202,10 @@
  701. data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
  702. if (data & CORE_WFI_STATUS(cpu))
  703. break;
  704. - spin_unlock_irq(&boot_lock);
  705. + raw_spin_unlock_irq(&boot_lock);
  706. /* Wait for clean L2 when the whole cluster is down. */
  707. msleep(POLL_MSEC);
  708. - spin_lock_irq(&boot_lock);
  709. + raw_spin_lock_irq(&boot_lock);
  710. }
  711. if (tries >= count)
  712. goto err;
  713. @@ -220,10 +220,10 @@
  714. }
  715. if (tries >= count)
  716. goto err;
  717. - spin_unlock_irq(&boot_lock);
  718. + raw_spin_unlock_irq(&boot_lock);
  719. return 0;
  720. err:
  721. - spin_unlock_irq(&boot_lock);
  722. + raw_spin_unlock_irq(&boot_lock);
  723. return ret;
  724. }
  725. @@ -235,10 +235,10 @@
  726. cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
  727. cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
  728. - spin_lock(&boot_lock);
  729. + raw_spin_lock(&boot_lock);
  730. if (!hip04_cpu_table[cluster][cpu])
  731. hip04_cpu_table[cluster][cpu] = 1;
  732. - spin_unlock(&boot_lock);
  733. + raw_spin_unlock(&boot_lock);
  734. }
  735. static void __naked hip04_mcpm_power_up_setup(unsigned int affinity_level)
  736. diff -Nur linux-4.1.39.orig/arch/arm/mach-omap2/gpio.c linux-4.1.39/arch/arm/mach-omap2/gpio.c
  737. --- linux-4.1.39.orig/arch/arm/mach-omap2/gpio.c 2017-03-13 21:04:36.000000000 +0100
  738. +++ linux-4.1.39/arch/arm/mach-omap2/gpio.c 2017-04-18 17:56:30.549394649 +0200
  739. @@ -130,7 +130,6 @@
  740. }
  741. pwrdm = omap_hwmod_get_pwrdm(oh);
  742. - pdata->loses_context = pwrdm_can_ever_lose_context(pwrdm);
  743. pdev = omap_device_build(name, id - 1, oh, pdata, sizeof(*pdata));
  744. kfree(pdata);
  745. diff -Nur linux-4.1.39.orig/arch/arm/mach-omap2/omap-smp.c linux-4.1.39/arch/arm/mach-omap2/omap-smp.c
  746. --- linux-4.1.39.orig/arch/arm/mach-omap2/omap-smp.c 2017-03-13 21:04:36.000000000 +0100
  747. +++ linux-4.1.39/arch/arm/mach-omap2/omap-smp.c 2017-04-18 17:56:30.549394649 +0200
  748. @@ -43,7 +43,7 @@
  749. /* SCU base address */
  750. static void __iomem *scu_base;
  751. -static DEFINE_SPINLOCK(boot_lock);
  752. +static DEFINE_RAW_SPINLOCK(boot_lock);
  753. void __iomem *omap4_get_scu_base(void)
  754. {
  755. @@ -74,8 +74,8 @@
  756. /*
  757. * Synchronise with the boot thread.
  758. */
  759. - spin_lock(&boot_lock);
  760. - spin_unlock(&boot_lock);
  761. + raw_spin_lock(&boot_lock);
  762. + raw_spin_unlock(&boot_lock);
  763. }
  764. static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
  765. @@ -89,7 +89,7 @@
  766. * Set synchronisation state between this boot processor
  767. * and the secondary one
  768. */
  769. - spin_lock(&boot_lock);
  770. + raw_spin_lock(&boot_lock);
  771. /*
  772. * Update the AuxCoreBoot0 with boot state for secondary core.
  773. @@ -166,7 +166,7 @@
  774. * Now the secondary core is starting up let it run its
  775. * calibrations, then wait for it to finish
  776. */
  777. - spin_unlock(&boot_lock);
  778. + raw_spin_unlock(&boot_lock);
  779. return 0;
  780. }
  781. diff -Nur linux-4.1.39.orig/arch/arm/mach-omap2/powerdomain.c linux-4.1.39/arch/arm/mach-omap2/powerdomain.c
  782. --- linux-4.1.39.orig/arch/arm/mach-omap2/powerdomain.c 2017-03-13 21:04:36.000000000 +0100
  783. +++ linux-4.1.39/arch/arm/mach-omap2/powerdomain.c 2017-04-18 17:56:30.553394804 +0200
  784. @@ -1166,43 +1166,3 @@
  785. return count;
  786. }
  787. -/**
  788. - * pwrdm_can_ever_lose_context - can this powerdomain ever lose context?
  789. - * @pwrdm: struct powerdomain *
  790. - *
  791. - * Given a struct powerdomain * @pwrdm, returns 1 if the powerdomain
  792. - * can lose either memory or logic context or if @pwrdm is invalid, or
  793. - * returns 0 otherwise. This function is not concerned with how the
  794. - * powerdomain registers are programmed (i.e., to go off or not); it's
  795. - * concerned with whether it's ever possible for this powerdomain to
  796. - * go off while some other part of the chip is active. This function
  797. - * assumes that every powerdomain can go to either ON or INACTIVE.
  798. - */
  799. -bool pwrdm_can_ever_lose_context(struct powerdomain *pwrdm)
  800. -{
  801. - int i;
  802. -
  803. - if (!pwrdm) {
  804. - pr_debug("powerdomain: %s: invalid powerdomain pointer\n",
  805. - __func__);
  806. - return 1;
  807. - }
  808. -
  809. - if (pwrdm->pwrsts & PWRSTS_OFF)
  810. - return 1;
  811. -
  812. - if (pwrdm->pwrsts & PWRSTS_RET) {
  813. - if (pwrdm->pwrsts_logic_ret & PWRSTS_OFF)
  814. - return 1;
  815. -
  816. - for (i = 0; i < pwrdm->banks; i++)
  817. - if (pwrdm->pwrsts_mem_ret[i] & PWRSTS_OFF)
  818. - return 1;
  819. - }
  820. -
  821. - for (i = 0; i < pwrdm->banks; i++)
  822. - if (pwrdm->pwrsts_mem_on[i] & PWRSTS_OFF)
  823. - return 1;
  824. -
  825. - return 0;
  826. -}
  827. diff -Nur linux-4.1.39.orig/arch/arm/mach-omap2/powerdomain.h linux-4.1.39/arch/arm/mach-omap2/powerdomain.h
  828. --- linux-4.1.39.orig/arch/arm/mach-omap2/powerdomain.h 2017-03-13 21:04:36.000000000 +0100
  829. +++ linux-4.1.39/arch/arm/mach-omap2/powerdomain.h 2017-04-18 17:56:30.553394804 +0200
  830. @@ -244,7 +244,6 @@
  831. int pwrdm_pre_transition(struct powerdomain *pwrdm);
  832. int pwrdm_post_transition(struct powerdomain *pwrdm);
  833. int pwrdm_get_context_loss_count(struct powerdomain *pwrdm);
  834. -bool pwrdm_can_ever_lose_context(struct powerdomain *pwrdm);
  835. extern int omap_set_pwrdm_state(struct powerdomain *pwrdm, u8 state);
  836. diff -Nur linux-4.1.39.orig/arch/arm/mach-prima2/platsmp.c linux-4.1.39/arch/arm/mach-prima2/platsmp.c
  837. --- linux-4.1.39.orig/arch/arm/mach-prima2/platsmp.c 2017-03-13 21:04:36.000000000 +0100
  838. +++ linux-4.1.39/arch/arm/mach-prima2/platsmp.c 2017-04-18 17:56:30.553394804 +0200
  839. @@ -22,7 +22,7 @@
  840. static void __iomem *clk_base;
  841. -static DEFINE_SPINLOCK(boot_lock);
  842. +static DEFINE_RAW_SPINLOCK(boot_lock);
  843. static void sirfsoc_secondary_init(unsigned int cpu)
  844. {
  845. @@ -36,8 +36,8 @@
  846. /*
  847. * Synchronise with the boot thread.
  848. */
  849. - spin_lock(&boot_lock);
  850. - spin_unlock(&boot_lock);
  851. + raw_spin_lock(&boot_lock);
  852. + raw_spin_unlock(&boot_lock);
  853. }
  854. static const struct of_device_id clk_ids[] = {
  855. @@ -75,7 +75,7 @@
  856. /* make sure write buffer is drained */
  857. mb();
  858. - spin_lock(&boot_lock);
  859. + raw_spin_lock(&boot_lock);
  860. /*
  861. * The secondary processor is waiting to be released from
  862. @@ -107,7 +107,7 @@
  863. * now the secondary core is starting up let it run its
  864. * calibrations, then wait for it to finish
  865. */
  866. - spin_unlock(&boot_lock);
  867. + raw_spin_unlock(&boot_lock);
  868. return pen_release != -1 ? -ENOSYS : 0;
  869. }
  870. diff -Nur linux-4.1.39.orig/arch/arm/mach-qcom/platsmp.c linux-4.1.39/arch/arm/mach-qcom/platsmp.c
  871. --- linux-4.1.39.orig/arch/arm/mach-qcom/platsmp.c 2017-03-13 21:04:36.000000000 +0100
  872. +++ linux-4.1.39/arch/arm/mach-qcom/platsmp.c 2017-04-18 17:56:30.553394804 +0200
  873. @@ -46,7 +46,7 @@
  874. extern void secondary_startup_arm(void);
  875. -static DEFINE_SPINLOCK(boot_lock);
  876. +static DEFINE_RAW_SPINLOCK(boot_lock);
  877. #ifdef CONFIG_HOTPLUG_CPU
  878. static void __ref qcom_cpu_die(unsigned int cpu)
  879. @@ -60,8 +60,8 @@
  880. /*
  881. * Synchronise with the boot thread.
  882. */
  883. - spin_lock(&boot_lock);
  884. - spin_unlock(&boot_lock);
  885. + raw_spin_lock(&boot_lock);
  886. + raw_spin_unlock(&boot_lock);
  887. }
  888. static int scss_release_secondary(unsigned int cpu)
  889. @@ -284,7 +284,7 @@
  890. * set synchronisation state between this boot processor
  891. * and the secondary one
  892. */
  893. - spin_lock(&boot_lock);
  894. + raw_spin_lock(&boot_lock);
  895. /*
  896. * Send the secondary CPU a soft interrupt, thereby causing
  897. @@ -297,7 +297,7 @@
  898. * now the secondary core is starting up let it run its
  899. * calibrations, then wait for it to finish
  900. */
  901. - spin_unlock(&boot_lock);
  902. + raw_spin_unlock(&boot_lock);
  903. return ret;
  904. }
  905. diff -Nur linux-4.1.39.orig/arch/arm/mach-spear/platsmp.c linux-4.1.39/arch/arm/mach-spear/platsmp.c
  906. --- linux-4.1.39.orig/arch/arm/mach-spear/platsmp.c 2017-03-13 21:04:36.000000000 +0100
  907. +++ linux-4.1.39/arch/arm/mach-spear/platsmp.c 2017-04-18 17:56:30.553394804 +0200
  908. @@ -32,7 +32,7 @@
  909. sync_cache_w(&pen_release);
  910. }
  911. -static DEFINE_SPINLOCK(boot_lock);
  912. +static DEFINE_RAW_SPINLOCK(boot_lock);
  913. static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
  914. @@ -47,8 +47,8 @@
  915. /*
  916. * Synchronise with the boot thread.
  917. */
  918. - spin_lock(&boot_lock);
  919. - spin_unlock(&boot_lock);
  920. + raw_spin_lock(&boot_lock);
  921. + raw_spin_unlock(&boot_lock);
  922. }
  923. static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
  924. @@ -59,7 +59,7 @@
  925. * set synchronisation state between this boot processor
  926. * and the secondary one
  927. */
  928. - spin_lock(&boot_lock);
  929. + raw_spin_lock(&boot_lock);
  930. /*
  931. * The secondary processor is waiting to be released from
  932. @@ -84,7 +84,7 @@
  933. * now the secondary core is starting up let it run its
  934. * calibrations, then wait for it to finish
  935. */
  936. - spin_unlock(&boot_lock);
  937. + raw_spin_unlock(&boot_lock);
  938. return pen_release != -1 ? -ENOSYS : 0;
  939. }
  940. diff -Nur linux-4.1.39.orig/arch/arm/mach-sti/platsmp.c linux-4.1.39/arch/arm/mach-sti/platsmp.c
  941. --- linux-4.1.39.orig/arch/arm/mach-sti/platsmp.c 2017-03-13 21:04:36.000000000 +0100
  942. +++ linux-4.1.39/arch/arm/mach-sti/platsmp.c 2017-04-18 17:56:30.553394804 +0200
  943. @@ -34,7 +34,7 @@
  944. sync_cache_w(&pen_release);
  945. }
  946. -static DEFINE_SPINLOCK(boot_lock);
  947. +static DEFINE_RAW_SPINLOCK(boot_lock);
  948. static void sti_secondary_init(unsigned int cpu)
  949. {
  950. @@ -49,8 +49,8 @@
  951. /*
  952. * Synchronise with the boot thread.
  953. */
  954. - spin_lock(&boot_lock);
  955. - spin_unlock(&boot_lock);
  956. + raw_spin_lock(&boot_lock);
  957. + raw_spin_unlock(&boot_lock);
  958. }
  959. static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
  960. @@ -61,7 +61,7 @@
  961. * set synchronisation state between this boot processor
  962. * and the secondary one
  963. */
  964. - spin_lock(&boot_lock);
  965. + raw_spin_lock(&boot_lock);
  966. /*
  967. * The secondary processor is waiting to be released from
  968. @@ -92,7 +92,7 @@
  969. * now the secondary core is starting up let it run its
  970. * calibrations, then wait for it to finish
  971. */
  972. - spin_unlock(&boot_lock);
  973. + raw_spin_unlock(&boot_lock);
  974. return pen_release != -1 ? -ENOSYS : 0;
  975. }
  976. diff -Nur linux-4.1.39.orig/arch/arm/mach-ux500/platsmp.c linux-4.1.39/arch/arm/mach-ux500/platsmp.c
  977. --- linux-4.1.39.orig/arch/arm/mach-ux500/platsmp.c 2017-03-13 21:04:36.000000000 +0100
  978. +++ linux-4.1.39/arch/arm/mach-ux500/platsmp.c 2017-04-18 17:56:30.553394804 +0200
  979. @@ -51,7 +51,7 @@
  980. return NULL;
  981. }
  982. -static DEFINE_SPINLOCK(boot_lock);
  983. +static DEFINE_RAW_SPINLOCK(boot_lock);
  984. static void ux500_secondary_init(unsigned int cpu)
  985. {
  986. @@ -64,8 +64,8 @@
  987. /*
  988. * Synchronise with the boot thread.
  989. */
  990. - spin_lock(&boot_lock);
  991. - spin_unlock(&boot_lock);
  992. + raw_spin_lock(&boot_lock);
  993. + raw_spin_unlock(&boot_lock);
  994. }
  995. static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
  996. @@ -76,7 +76,7 @@
  997. * set synchronisation state between this boot processor
  998. * and the secondary one
  999. */
  1000. - spin_lock(&boot_lock);
  1001. + raw_spin_lock(&boot_lock);
  1002. /*
  1003. * The secondary processor is waiting to be released from
  1004. @@ -97,7 +97,7 @@
  1005. * now the secondary core is starting up let it run its
  1006. * calibrations, then wait for it to finish
  1007. */
  1008. - spin_unlock(&boot_lock);
  1009. + raw_spin_unlock(&boot_lock);
  1010. return pen_release != -1 ? -ENOSYS : 0;
  1011. }
  1012. diff -Nur linux-4.1.39.orig/arch/arm/mm/fault.c linux-4.1.39/arch/arm/mm/fault.c
  1013. --- linux-4.1.39.orig/arch/arm/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  1014. +++ linux-4.1.39/arch/arm/mm/fault.c 2017-04-18 17:56:30.553394804 +0200
  1015. @@ -276,7 +276,7 @@
  1016. * If we're in an interrupt or have no user
  1017. * context, we must not take the fault..
  1018. */
  1019. - if (in_atomic() || !mm)
  1020. + if (faulthandler_disabled() || !mm)
  1021. goto no_context;
  1022. if (user_mode(regs))
  1023. @@ -430,6 +430,9 @@
  1024. if (addr < TASK_SIZE)
  1025. return do_page_fault(addr, fsr, regs);
  1026. + if (interrupts_enabled(regs))
  1027. + local_irq_enable();
  1028. +
  1029. if (user_mode(regs))
  1030. goto bad_area;
  1031. @@ -497,6 +500,9 @@
  1032. static int
  1033. do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
  1034. {
  1035. + if (interrupts_enabled(regs))
  1036. + local_irq_enable();
  1037. +
  1038. do_bad_area(addr, fsr, regs);
  1039. return 0;
  1040. }
  1041. diff -Nur linux-4.1.39.orig/arch/arm/mm/highmem.c linux-4.1.39/arch/arm/mm/highmem.c
  1042. --- linux-4.1.39.orig/arch/arm/mm/highmem.c 2017-03-13 21:04:36.000000000 +0100
  1043. +++ linux-4.1.39/arch/arm/mm/highmem.c 2017-04-18 17:56:30.553394804 +0200
  1044. @@ -54,11 +54,13 @@
  1045. void *kmap_atomic(struct page *page)
  1046. {
  1047. + pte_t pte = mk_pte(page, kmap_prot);
  1048. unsigned int idx;
  1049. unsigned long vaddr;
  1050. void *kmap;
  1051. int type;
  1052. + preempt_disable_nort();
  1053. pagefault_disable();
  1054. if (!PageHighMem(page))
  1055. return page_address(page);
  1056. @@ -92,7 +94,10 @@
  1057. * in place, so the contained TLB flush ensures the TLB is updated
  1058. * with the new mapping.
  1059. */
  1060. - set_fixmap_pte(idx, mk_pte(page, kmap_prot));
  1061. +#ifdef CONFIG_PREEMPT_RT_FULL
  1062. + current->kmap_pte[type] = pte;
  1063. +#endif
  1064. + set_fixmap_pte(idx, pte);
  1065. return (void *)vaddr;
  1066. }
  1067. @@ -109,27 +114,33 @@
  1068. if (cache_is_vivt())
  1069. __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
  1070. +#ifdef CONFIG_PREEMPT_RT_FULL
  1071. + current->kmap_pte[type] = __pte(0);
  1072. +#endif
  1073. #ifdef CONFIG_DEBUG_HIGHMEM
  1074. BUG_ON(vaddr != __fix_to_virt(idx));
  1075. - set_fixmap_pte(idx, __pte(0));
  1076. #else
  1077. (void) idx; /* to kill a warning */
  1078. #endif
  1079. + set_fixmap_pte(idx, __pte(0));
  1080. kmap_atomic_idx_pop();
  1081. } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
  1082. /* this address was obtained through kmap_high_get() */
  1083. kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
  1084. }
  1085. pagefault_enable();
  1086. + preempt_enable_nort();
  1087. }
  1088. EXPORT_SYMBOL(__kunmap_atomic);
  1089. void *kmap_atomic_pfn(unsigned long pfn)
  1090. {
  1091. + pte_t pte = pfn_pte(pfn, kmap_prot);
  1092. unsigned long vaddr;
  1093. int idx, type;
  1094. struct page *page = pfn_to_page(pfn);
  1095. + preempt_disable_nort();
  1096. pagefault_disable();
  1097. if (!PageHighMem(page))
  1098. return page_address(page);
  1099. @@ -140,7 +151,10 @@
  1100. #ifdef CONFIG_DEBUG_HIGHMEM
  1101. BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
  1102. #endif
  1103. - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
  1104. +#ifdef CONFIG_PREEMPT_RT_FULL
  1105. + current->kmap_pte[type] = pte;
  1106. +#endif
  1107. + set_fixmap_pte(idx, pte);
  1108. return (void *)vaddr;
  1109. }
  1110. @@ -154,3 +168,28 @@
  1111. return pte_page(get_fixmap_pte(vaddr));
  1112. }
  1113. +
  1114. +#if defined CONFIG_PREEMPT_RT_FULL
  1115. +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  1116. +{
  1117. + int i;
  1118. +
  1119. + /*
  1120. + * Clear @prev's kmap_atomic mappings
  1121. + */
  1122. + for (i = 0; i < prev_p->kmap_idx; i++) {
  1123. + int idx = i + KM_TYPE_NR * smp_processor_id();
  1124. +
  1125. + set_fixmap_pte(idx, __pte(0));
  1126. + }
  1127. + /*
  1128. + * Restore @next_p's kmap_atomic mappings
  1129. + */
  1130. + for (i = 0; i < next_p->kmap_idx; i++) {
  1131. + int idx = i + KM_TYPE_NR * smp_processor_id();
  1132. +
  1133. + if (!pte_none(next_p->kmap_pte[i]))
  1134. + set_fixmap_pte(idx, next_p->kmap_pte[i]);
  1135. + }
  1136. +}
  1137. +#endif
  1138. diff -Nur linux-4.1.39.orig/arch/arm/plat-versatile/platsmp.c linux-4.1.39/arch/arm/plat-versatile/platsmp.c
  1139. --- linux-4.1.39.orig/arch/arm/plat-versatile/platsmp.c 2017-03-13 21:04:36.000000000 +0100
  1140. +++ linux-4.1.39/arch/arm/plat-versatile/platsmp.c 2017-04-18 17:56:30.553394804 +0200
  1141. @@ -30,7 +30,7 @@
  1142. sync_cache_w(&pen_release);
  1143. }
  1144. -static DEFINE_SPINLOCK(boot_lock);
  1145. +static DEFINE_RAW_SPINLOCK(boot_lock);
  1146. void versatile_secondary_init(unsigned int cpu)
  1147. {
  1148. @@ -43,8 +43,8 @@
  1149. /*
  1150. * Synchronise with the boot thread.
  1151. */
  1152. - spin_lock(&boot_lock);
  1153. - spin_unlock(&boot_lock);
  1154. + raw_spin_lock(&boot_lock);
  1155. + raw_spin_unlock(&boot_lock);
  1156. }
  1157. int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
  1158. @@ -55,7 +55,7 @@
  1159. * Set synchronisation state between this boot processor
  1160. * and the secondary one
  1161. */
  1162. - spin_lock(&boot_lock);
  1163. + raw_spin_lock(&boot_lock);
  1164. /*
  1165. * This is really belt and braces; we hold unintended secondary
  1166. @@ -85,7 +85,7 @@
  1167. * now the secondary core is starting up let it run its
  1168. * calibrations, then wait for it to finish
  1169. */
  1170. - spin_unlock(&boot_lock);
  1171. + raw_spin_unlock(&boot_lock);
  1172. return pen_release != -1 ? -ENOSYS : 0;
  1173. }
  1174. diff -Nur linux-4.1.39.orig/arch/arm64/include/asm/futex.h linux-4.1.39/arch/arm64/include/asm/futex.h
  1175. --- linux-4.1.39.orig/arch/arm64/include/asm/futex.h 2017-03-13 21:04:36.000000000 +0100
  1176. +++ linux-4.1.39/arch/arm64/include/asm/futex.h 2017-04-18 17:56:30.553394804 +0200
  1177. @@ -58,7 +58,7 @@
  1178. if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
  1179. return -EFAULT;
  1180. - pagefault_disable(); /* implies preempt_disable() */
  1181. + pagefault_disable();
  1182. switch (op) {
  1183. case FUTEX_OP_SET:
  1184. @@ -85,7 +85,7 @@
  1185. ret = -ENOSYS;
  1186. }
  1187. - pagefault_enable(); /* subsumes preempt_enable() */
  1188. + pagefault_enable();
  1189. if (!ret) {
  1190. switch (cmp) {
  1191. diff -Nur linux-4.1.39.orig/arch/arm64/include/asm/thread_info.h linux-4.1.39/arch/arm64/include/asm/thread_info.h
  1192. --- linux-4.1.39.orig/arch/arm64/include/asm/thread_info.h 2017-03-13 21:04:36.000000000 +0100
  1193. +++ linux-4.1.39/arch/arm64/include/asm/thread_info.h 2017-04-18 17:56:30.553394804 +0200
  1194. @@ -47,6 +47,7 @@
  1195. mm_segment_t addr_limit; /* address limit */
  1196. struct task_struct *task; /* main task structure */
  1197. int preempt_count; /* 0 => preemptable, <0 => bug */
  1198. + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
  1199. int cpu; /* cpu */
  1200. };
  1201. @@ -101,6 +102,7 @@
  1202. #define TIF_NEED_RESCHED 1
  1203. #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
  1204. #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
  1205. +#define TIF_NEED_RESCHED_LAZY 4
  1206. #define TIF_NOHZ 7
  1207. #define TIF_SYSCALL_TRACE 8
  1208. #define TIF_SYSCALL_AUDIT 9
  1209. @@ -117,6 +119,7 @@
  1210. #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
  1211. #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
  1212. #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
  1213. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  1214. #define _TIF_NOHZ (1 << TIF_NOHZ)
  1215. #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
  1216. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  1217. diff -Nur linux-4.1.39.orig/arch/arm64/Kconfig linux-4.1.39/arch/arm64/Kconfig
  1218. --- linux-4.1.39.orig/arch/arm64/Kconfig 2017-03-13 21:04:36.000000000 +0100
  1219. +++ linux-4.1.39/arch/arm64/Kconfig 2017-04-18 17:56:30.553394804 +0200
  1220. @@ -69,8 +69,10 @@
  1221. select HAVE_PERF_REGS
  1222. select HAVE_PERF_USER_STACK_DUMP
  1223. select HAVE_RCU_TABLE_FREE
  1224. + select HAVE_PREEMPT_LAZY
  1225. select HAVE_SYSCALL_TRACEPOINTS
  1226. select IRQ_DOMAIN
  1227. + select IRQ_FORCED_THREADING
  1228. select MODULES_USE_ELF_RELA
  1229. select NO_BOOTMEM
  1230. select OF
  1231. @@ -599,7 +601,7 @@
  1232. config XEN
  1233. bool "Xen guest support on ARM64"
  1234. - depends on ARM64 && OF
  1235. + depends on ARM64 && OF && !PREEMPT_RT_FULL
  1236. select SWIOTLB_XEN
  1237. help
  1238. Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
  1239. diff -Nur linux-4.1.39.orig/arch/arm64/kernel/asm-offsets.c linux-4.1.39/arch/arm64/kernel/asm-offsets.c
  1240. --- linux-4.1.39.orig/arch/arm64/kernel/asm-offsets.c 2017-03-13 21:04:36.000000000 +0100
  1241. +++ linux-4.1.39/arch/arm64/kernel/asm-offsets.c 2017-04-18 17:56:30.553394804 +0200
  1242. @@ -35,6 +35,7 @@
  1243. BLANK();
  1244. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  1245. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  1246. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  1247. DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
  1248. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  1249. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  1250. diff -Nur linux-4.1.39.orig/arch/arm64/kernel/entry.S linux-4.1.39/arch/arm64/kernel/entry.S
  1251. --- linux-4.1.39.orig/arch/arm64/kernel/entry.S 2017-03-13 21:04:36.000000000 +0100
  1252. +++ linux-4.1.39/arch/arm64/kernel/entry.S 2017-04-18 17:56:30.553394804 +0200
  1253. @@ -380,11 +380,16 @@
  1254. #ifdef CONFIG_PREEMPT
  1255. get_thread_info tsk
  1256. ldr w24, [tsk, #TI_PREEMPT] // get preempt count
  1257. - cbnz w24, 1f // preempt count != 0
  1258. + cbnz w24, 2f // preempt count != 0
  1259. ldr x0, [tsk, #TI_FLAGS] // get flags
  1260. - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
  1261. - bl el1_preempt
  1262. + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
  1263. +
  1264. + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count
  1265. + cbnz w24, 2f // preempt lazy count != 0
  1266. + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling?
  1267. 1:
  1268. + bl el1_preempt
  1269. +2:
  1270. #endif
  1271. #ifdef CONFIG_TRACE_IRQFLAGS
  1272. bl trace_hardirqs_on
  1273. @@ -398,6 +403,7 @@
  1274. 1: bl preempt_schedule_irq // irq en/disable is done inside
  1275. ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
  1276. tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
  1277. + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling?
  1278. ret x24
  1279. #endif
  1280. @@ -635,6 +641,7 @@
  1281. str x0, [sp, #S_X0] // returned x0
  1282. work_pending:
  1283. tbnz x1, #TIF_NEED_RESCHED, work_resched
  1284. + tbnz x1, #TIF_NEED_RESCHED_LAZY, work_resched
  1285. /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
  1286. ldr x2, [sp, #S_PSTATE]
  1287. mov x0, sp // 'regs'
  1288. diff -Nur linux-4.1.39.orig/arch/arm64/kernel/insn.c linux-4.1.39/arch/arm64/kernel/insn.c
  1289. --- linux-4.1.39.orig/arch/arm64/kernel/insn.c 2017-03-13 21:04:36.000000000 +0100
  1290. +++ linux-4.1.39/arch/arm64/kernel/insn.c 2017-04-18 17:56:30.553394804 +0200
  1291. @@ -77,7 +77,7 @@
  1292. }
  1293. }
  1294. -static DEFINE_SPINLOCK(patch_lock);
  1295. +static DEFINE_RAW_SPINLOCK(patch_lock);
  1296. static void __kprobes *patch_map(void *addr, int fixmap)
  1297. {
  1298. @@ -124,13 +124,13 @@
  1299. unsigned long flags = 0;
  1300. int ret;
  1301. - spin_lock_irqsave(&patch_lock, flags);
  1302. + raw_spin_lock_irqsave(&patch_lock, flags);
  1303. waddr = patch_map(addr, FIX_TEXT_POKE0);
  1304. ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE);
  1305. patch_unmap(FIX_TEXT_POKE0);
  1306. - spin_unlock_irqrestore(&patch_lock, flags);
  1307. + raw_spin_unlock_irqrestore(&patch_lock, flags);
  1308. return ret;
  1309. }
  1310. diff -Nur linux-4.1.39.orig/arch/arm64/kernel/perf_event.c linux-4.1.39/arch/arm64/kernel/perf_event.c
  1311. --- linux-4.1.39.orig/arch/arm64/kernel/perf_event.c 2017-03-13 21:04:36.000000000 +0100
  1312. +++ linux-4.1.39/arch/arm64/kernel/perf_event.c 2017-04-18 17:56:30.553394804 +0200
  1313. @@ -488,7 +488,7 @@
  1314. }
  1315. err = request_irq(irq, armpmu->handle_irq,
  1316. - IRQF_NOBALANCING,
  1317. + IRQF_NOBALANCING | IRQF_NO_THREAD,
  1318. "arm-pmu", armpmu);
  1319. if (err) {
  1320. pr_err("unable to request IRQ%d for ARM PMU counters\n",
  1321. diff -Nur linux-4.1.39.orig/arch/arm64/mm/fault.c linux-4.1.39/arch/arm64/mm/fault.c
  1322. --- linux-4.1.39.orig/arch/arm64/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  1323. +++ linux-4.1.39/arch/arm64/mm/fault.c 2017-04-18 17:56:30.553394804 +0200
  1324. @@ -211,7 +211,7 @@
  1325. * If we're in an interrupt or have no user context, we must not take
  1326. * the fault.
  1327. */
  1328. - if (in_atomic() || !mm)
  1329. + if (faulthandler_disabled() || !mm)
  1330. goto no_context;
  1331. if (user_mode(regs))
  1332. diff -Nur linux-4.1.39.orig/arch/avr32/include/asm/uaccess.h linux-4.1.39/arch/avr32/include/asm/uaccess.h
  1333. --- linux-4.1.39.orig/arch/avr32/include/asm/uaccess.h 2017-03-13 21:04:36.000000000 +0100
  1334. +++ linux-4.1.39/arch/avr32/include/asm/uaccess.h 2017-04-18 17:56:30.553394804 +0200
  1335. @@ -106,7 +106,8 @@
  1336. * @x: Value to copy to user space.
  1337. * @ptr: Destination address, in user space.
  1338. *
  1339. - * Context: User context only. This function may sleep.
  1340. + * Context: User context only. This function may sleep if pagefaults are
  1341. + * enabled.
  1342. *
  1343. * This macro copies a single simple value from kernel space to user
  1344. * space. It supports simple types like char and int, but not larger
  1345. @@ -125,7 +126,8 @@
  1346. * @x: Variable to store result.
  1347. * @ptr: Source address, in user space.
  1348. *
  1349. - * Context: User context only. This function may sleep.
  1350. + * Context: User context only. This function may sleep if pagefaults are
  1351. + * enabled.
  1352. *
  1353. * This macro copies a single simple variable from user space to kernel
  1354. * space. It supports simple types like char and int, but not larger
  1355. @@ -145,7 +147,8 @@
  1356. * @x: Value to copy to user space.
  1357. * @ptr: Destination address, in user space.
  1358. *
  1359. - * Context: User context only. This function may sleep.
  1360. + * Context: User context only. This function may sleep if pagefaults are
  1361. + * enabled.
  1362. *
  1363. * This macro copies a single simple value from kernel space to user
  1364. * space. It supports simple types like char and int, but not larger
  1365. @@ -167,7 +170,8 @@
  1366. * @x: Variable to store result.
  1367. * @ptr: Source address, in user space.
  1368. *
  1369. - * Context: User context only. This function may sleep.
  1370. + * Context: User context only. This function may sleep if pagefaults are
  1371. + * enabled.
  1372. *
  1373. * This macro copies a single simple variable from user space to kernel
  1374. * space. It supports simple types like char and int, but not larger
  1375. diff -Nur linux-4.1.39.orig/arch/avr32/mm/fault.c linux-4.1.39/arch/avr32/mm/fault.c
  1376. --- linux-4.1.39.orig/arch/avr32/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  1377. +++ linux-4.1.39/arch/avr32/mm/fault.c 2017-04-18 17:56:30.553394804 +0200
  1378. @@ -14,11 +14,11 @@
  1379. #include <linux/pagemap.h>
  1380. #include <linux/kdebug.h>
  1381. #include <linux/kprobes.h>
  1382. +#include <linux/uaccess.h>
  1383. #include <asm/mmu_context.h>
  1384. #include <asm/sysreg.h>
  1385. #include <asm/tlb.h>
  1386. -#include <asm/uaccess.h>
  1387. #ifdef CONFIG_KPROBES
  1388. static inline int notify_page_fault(struct pt_regs *regs, int trap)
  1389. @@ -81,7 +81,7 @@
  1390. * If we're in an interrupt or have no user context, we must
  1391. * not take the fault...
  1392. */
  1393. - if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
  1394. + if (faulthandler_disabled() || !mm || regs->sr & SYSREG_BIT(GM))
  1395. goto no_context;
  1396. local_irq_enable();
  1397. diff -Nur linux-4.1.39.orig/arch/cris/mm/fault.c linux-4.1.39/arch/cris/mm/fault.c
  1398. --- linux-4.1.39.orig/arch/cris/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  1399. +++ linux-4.1.39/arch/cris/mm/fault.c 2017-04-18 17:56:30.553394804 +0200
  1400. @@ -8,7 +8,7 @@
  1401. #include <linux/interrupt.h>
  1402. #include <linux/module.h>
  1403. #include <linux/wait.h>
  1404. -#include <asm/uaccess.h>
  1405. +#include <linux/uaccess.h>
  1406. #include <arch/system.h>
  1407. extern int find_fixup_code(struct pt_regs *);
  1408. @@ -109,11 +109,11 @@
  1409. info.si_code = SEGV_MAPERR;
  1410. /*
  1411. - * If we're in an interrupt or "atomic" operation or have no
  1412. + * If we're in an interrupt, have pagefaults disabled or have no
  1413. * user context, we must not take the fault.
  1414. */
  1415. - if (in_atomic() || !mm)
  1416. + if (faulthandler_disabled() || !mm)
  1417. goto no_context;
  1418. if (user_mode(regs))
  1419. diff -Nur linux-4.1.39.orig/arch/frv/mm/fault.c linux-4.1.39/arch/frv/mm/fault.c
  1420. --- linux-4.1.39.orig/arch/frv/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  1421. +++ linux-4.1.39/arch/frv/mm/fault.c 2017-04-18 17:56:30.553394804 +0200
  1422. @@ -19,9 +19,9 @@
  1423. #include <linux/kernel.h>
  1424. #include <linux/ptrace.h>
  1425. #include <linux/hardirq.h>
  1426. +#include <linux/uaccess.h>
  1427. #include <asm/pgtable.h>
  1428. -#include <asm/uaccess.h>
  1429. #include <asm/gdb-stub.h>
  1430. /*****************************************************************************/
  1431. @@ -78,7 +78,7 @@
  1432. * If we're in an interrupt or have no user
  1433. * context, we must not take the fault..
  1434. */
  1435. - if (in_atomic() || !mm)
  1436. + if (faulthandler_disabled() || !mm)
  1437. goto no_context;
  1438. if (user_mode(__frame))
  1439. diff -Nur linux-4.1.39.orig/arch/frv/mm/highmem.c linux-4.1.39/arch/frv/mm/highmem.c
  1440. --- linux-4.1.39.orig/arch/frv/mm/highmem.c 2017-03-13 21:04:36.000000000 +0100
  1441. +++ linux-4.1.39/arch/frv/mm/highmem.c 2017-04-18 17:56:30.553394804 +0200
  1442. @@ -42,6 +42,7 @@
  1443. unsigned long paddr;
  1444. int type;
  1445. + preempt_disable();
  1446. pagefault_disable();
  1447. type = kmap_atomic_idx_push();
  1448. paddr = page_to_phys(page);
  1449. @@ -85,5 +86,6 @@
  1450. }
  1451. kmap_atomic_idx_pop();
  1452. pagefault_enable();
  1453. + preempt_enable();
  1454. }
  1455. EXPORT_SYMBOL(__kunmap_atomic);
  1456. diff -Nur linux-4.1.39.orig/arch/hexagon/include/asm/uaccess.h linux-4.1.39/arch/hexagon/include/asm/uaccess.h
  1457. --- linux-4.1.39.orig/arch/hexagon/include/asm/uaccess.h 2017-03-13 21:04:36.000000000 +0100
  1458. +++ linux-4.1.39/arch/hexagon/include/asm/uaccess.h 2017-04-18 17:56:30.553394804 +0200
  1459. @@ -36,7 +36,8 @@
  1460. * @addr: User space pointer to start of block to check
  1461. * @size: Size of block to check
  1462. *
  1463. - * Context: User context only. This function may sleep.
  1464. + * Context: User context only. This function may sleep if pagefaults are
  1465. + * enabled.
  1466. *
  1467. * Checks if a pointer to a block of memory in user space is valid.
  1468. *
  1469. diff -Nur linux-4.1.39.orig/arch/ia64/mm/fault.c linux-4.1.39/arch/ia64/mm/fault.c
  1470. --- linux-4.1.39.orig/arch/ia64/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  1471. +++ linux-4.1.39/arch/ia64/mm/fault.c 2017-04-18 17:56:30.553394804 +0200
  1472. @@ -11,10 +11,10 @@
  1473. #include <linux/kprobes.h>
  1474. #include <linux/kdebug.h>
  1475. #include <linux/prefetch.h>
  1476. +#include <linux/uaccess.h>
  1477. #include <asm/pgtable.h>
  1478. #include <asm/processor.h>
  1479. -#include <asm/uaccess.h>
  1480. extern int die(char *, struct pt_regs *, long);
  1481. @@ -96,7 +96,7 @@
  1482. /*
  1483. * If we're in an interrupt or have no user context, we must not take the fault..
  1484. */
  1485. - if (in_atomic() || !mm)
  1486. + if (faulthandler_disabled() || !mm)
  1487. goto no_context;
  1488. #ifdef CONFIG_VIRTUAL_MEM_MAP
  1489. diff -Nur linux-4.1.39.orig/arch/Kconfig linux-4.1.39/arch/Kconfig
  1490. --- linux-4.1.39.orig/arch/Kconfig 2017-03-13 21:04:36.000000000 +0100
  1491. +++ linux-4.1.39/arch/Kconfig 2017-04-18 17:56:30.549394649 +0200
  1492. @@ -6,6 +6,7 @@
  1493. tristate "OProfile system profiling"
  1494. depends on PROFILING
  1495. depends on HAVE_OPROFILE
  1496. + depends on !PREEMPT_RT_FULL
  1497. select RING_BUFFER
  1498. select RING_BUFFER_ALLOW_SWAP
  1499. help
  1500. @@ -49,6 +50,7 @@
  1501. config JUMP_LABEL
  1502. bool "Optimize very unlikely/likely branches"
  1503. depends on HAVE_ARCH_JUMP_LABEL
  1504. + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST)
  1505. help
  1506. This option enables a transparent branch optimization that
  1507. makes certain almost-always-true or almost-always-false branch
  1508. diff -Nur linux-4.1.39.orig/arch/m32r/include/asm/uaccess.h linux-4.1.39/arch/m32r/include/asm/uaccess.h
  1509. --- linux-4.1.39.orig/arch/m32r/include/asm/uaccess.h 2017-03-13 21:04:36.000000000 +0100
  1510. +++ linux-4.1.39/arch/m32r/include/asm/uaccess.h 2017-04-18 17:56:30.553394804 +0200
  1511. @@ -91,7 +91,8 @@
  1512. * @addr: User space pointer to start of block to check
  1513. * @size: Size of block to check
  1514. *
  1515. - * Context: User context only. This function may sleep.
  1516. + * Context: User context only. This function may sleep if pagefaults are
  1517. + * enabled.
  1518. *
  1519. * Checks if a pointer to a block of memory in user space is valid.
  1520. *
  1521. @@ -155,7 +156,8 @@
  1522. * @x: Variable to store result.
  1523. * @ptr: Source address, in user space.
  1524. *
  1525. - * Context: User context only. This function may sleep.
  1526. + * Context: User context only. This function may sleep if pagefaults are
  1527. + * enabled.
  1528. *
  1529. * This macro copies a single simple variable from user space to kernel
  1530. * space. It supports simple types like char and int, but not larger
  1531. @@ -175,7 +177,8 @@
  1532. * @x: Value to copy to user space.
  1533. * @ptr: Destination address, in user space.
  1534. *
  1535. - * Context: User context only. This function may sleep.
  1536. + * Context: User context only. This function may sleep if pagefaults are
  1537. + * enabled.
  1538. *
  1539. * This macro copies a single simple value from kernel space to user
  1540. * space. It supports simple types like char and int, but not larger
  1541. @@ -194,7 +197,8 @@
  1542. * @x: Variable to store result.
  1543. * @ptr: Source address, in user space.
  1544. *
  1545. - * Context: User context only. This function may sleep.
  1546. + * Context: User context only. This function may sleep if pagefaults are
  1547. + * enabled.
  1548. *
  1549. * This macro copies a single simple variable from user space to kernel
  1550. * space. It supports simple types like char and int, but not larger
  1551. @@ -274,7 +278,8 @@
  1552. * @x: Value to copy to user space.
  1553. * @ptr: Destination address, in user space.
  1554. *
  1555. - * Context: User context only. This function may sleep.
  1556. + * Context: User context only. This function may sleep if pagefaults are
  1557. + * enabled.
  1558. *
  1559. * This macro copies a single simple value from kernel space to user
  1560. * space. It supports simple types like char and int, but not larger
  1561. @@ -568,7 +573,8 @@
  1562. * @from: Source address, in kernel space.
  1563. * @n: Number of bytes to copy.
  1564. *
  1565. - * Context: User context only. This function may sleep.
  1566. + * Context: User context only. This function may sleep if pagefaults are
  1567. + * enabled.
  1568. *
  1569. * Copy data from kernel space to user space. Caller must check
  1570. * the specified block with access_ok() before calling this function.
  1571. @@ -588,7 +594,8 @@
  1572. * @from: Source address, in kernel space.
  1573. * @n: Number of bytes to copy.
  1574. *
  1575. - * Context: User context only. This function may sleep.
  1576. + * Context: User context only. This function may sleep if pagefaults are
  1577. + * enabled.
  1578. *
  1579. * Copy data from kernel space to user space.
  1580. *
  1581. @@ -606,7 +613,8 @@
  1582. * @from: Source address, in user space.
  1583. * @n: Number of bytes to copy.
  1584. *
  1585. - * Context: User context only. This function may sleep.
  1586. + * Context: User context only. This function may sleep if pagefaults are
  1587. + * enabled.
  1588. *
  1589. * Copy data from user space to kernel space. Caller must check
  1590. * the specified block with access_ok() before calling this function.
  1591. @@ -626,7 +634,8 @@
  1592. * @from: Source address, in user space.
  1593. * @n: Number of bytes to copy.
  1594. *
  1595. - * Context: User context only. This function may sleep.
  1596. + * Context: User context only. This function may sleep if pagefaults are
  1597. + * enabled.
  1598. *
  1599. * Copy data from user space to kernel space.
  1600. *
  1601. @@ -677,7 +686,8 @@
  1602. * strlen_user: - Get the size of a string in user space.
  1603. * @str: The string to measure.
  1604. *
  1605. - * Context: User context only. This function may sleep.
  1606. + * Context: User context only. This function may sleep if pagefaults are
  1607. + * enabled.
  1608. *
  1609. * Get the size of a NUL-terminated string in user space.
  1610. *
  1611. diff -Nur linux-4.1.39.orig/arch/m32r/mm/fault.c linux-4.1.39/arch/m32r/mm/fault.c
  1612. --- linux-4.1.39.orig/arch/m32r/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  1613. +++ linux-4.1.39/arch/m32r/mm/fault.c 2017-04-18 17:56:30.553394804 +0200
  1614. @@ -24,9 +24,9 @@
  1615. #include <linux/vt_kern.h> /* For unblank_screen() */
  1616. #include <linux/highmem.h>
  1617. #include <linux/module.h>
  1618. +#include <linux/uaccess.h>
  1619. #include <asm/m32r.h>
  1620. -#include <asm/uaccess.h>
  1621. #include <asm/hardirq.h>
  1622. #include <asm/mmu_context.h>
  1623. #include <asm/tlbflush.h>
  1624. @@ -111,10 +111,10 @@
  1625. mm = tsk->mm;
  1626. /*
  1627. - * If we're in an interrupt or have no user context or are running in an
  1628. - * atomic region then we must not take the fault..
  1629. + * If we're in an interrupt or have no user context or have pagefaults
  1630. + * disabled then we must not take the fault.
  1631. */
  1632. - if (in_atomic() || !mm)
  1633. + if (faulthandler_disabled() || !mm)
  1634. goto bad_area_nosemaphore;
  1635. if (error_code & ACE_USERMODE)
  1636. diff -Nur linux-4.1.39.orig/arch/m68k/mm/fault.c linux-4.1.39/arch/m68k/mm/fault.c
  1637. --- linux-4.1.39.orig/arch/m68k/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  1638. +++ linux-4.1.39/arch/m68k/mm/fault.c 2017-04-18 17:56:30.557394959 +0200
  1639. @@ -10,10 +10,10 @@
  1640. #include <linux/ptrace.h>
  1641. #include <linux/interrupt.h>
  1642. #include <linux/module.h>
  1643. +#include <linux/uaccess.h>
  1644. #include <asm/setup.h>
  1645. #include <asm/traps.h>
  1646. -#include <asm/uaccess.h>
  1647. #include <asm/pgalloc.h>
  1648. extern void die_if_kernel(char *, struct pt_regs *, long);
  1649. @@ -81,7 +81,7 @@
  1650. * If we're in an interrupt or have no user
  1651. * context, we must not take the fault..
  1652. */
  1653. - if (in_atomic() || !mm)
  1654. + if (faulthandler_disabled() || !mm)
  1655. goto no_context;
  1656. if (user_mode(regs))
  1657. diff -Nur linux-4.1.39.orig/arch/metag/mm/fault.c linux-4.1.39/arch/metag/mm/fault.c
  1658. --- linux-4.1.39.orig/arch/metag/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  1659. +++ linux-4.1.39/arch/metag/mm/fault.c 2017-04-18 17:56:30.557394959 +0200
  1660. @@ -105,7 +105,7 @@
  1661. mm = tsk->mm;
  1662. - if (in_atomic() || !mm)
  1663. + if (faulthandler_disabled() || !mm)
  1664. goto no_context;
  1665. if (user_mode(regs))
  1666. diff -Nur linux-4.1.39.orig/arch/metag/mm/highmem.c linux-4.1.39/arch/metag/mm/highmem.c
  1667. --- linux-4.1.39.orig/arch/metag/mm/highmem.c 2017-03-13 21:04:36.000000000 +0100
  1668. +++ linux-4.1.39/arch/metag/mm/highmem.c 2017-04-18 17:56:30.557394959 +0200
  1669. @@ -43,7 +43,7 @@
  1670. unsigned long vaddr;
  1671. int type;
  1672. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  1673. + preempt_disable();
  1674. pagefault_disable();
  1675. if (!PageHighMem(page))
  1676. return page_address(page);
  1677. @@ -82,6 +82,7 @@
  1678. }
  1679. pagefault_enable();
  1680. + preempt_enable();
  1681. }
  1682. EXPORT_SYMBOL(__kunmap_atomic);
  1683. @@ -95,6 +96,7 @@
  1684. unsigned long vaddr;
  1685. int type;
  1686. + preempt_disable();
  1687. pagefault_disable();
  1688. type = kmap_atomic_idx_push();
  1689. diff -Nur linux-4.1.39.orig/arch/microblaze/include/asm/uaccess.h linux-4.1.39/arch/microblaze/include/asm/uaccess.h
  1690. --- linux-4.1.39.orig/arch/microblaze/include/asm/uaccess.h 2017-03-13 21:04:36.000000000 +0100
  1691. +++ linux-4.1.39/arch/microblaze/include/asm/uaccess.h 2017-04-18 17:56:30.557394959 +0200
  1692. @@ -178,7 +178,8 @@
  1693. * @x: Variable to store result.
  1694. * @ptr: Source address, in user space.
  1695. *
  1696. - * Context: User context only. This function may sleep.
  1697. + * Context: User context only. This function may sleep if pagefaults are
  1698. + * enabled.
  1699. *
  1700. * This macro copies a single simple variable from user space to kernel
  1701. * space. It supports simple types like char and int, but not larger
  1702. @@ -290,7 +291,8 @@
  1703. * @x: Value to copy to user space.
  1704. * @ptr: Destination address, in user space.
  1705. *
  1706. - * Context: User context only. This function may sleep.
  1707. + * Context: User context only. This function may sleep if pagefaults are
  1708. + * enabled.
  1709. *
  1710. * This macro copies a single simple value from kernel space to user
  1711. * space. It supports simple types like char and int, but not larger
  1712. diff -Nur linux-4.1.39.orig/arch/microblaze/mm/fault.c linux-4.1.39/arch/microblaze/mm/fault.c
  1713. --- linux-4.1.39.orig/arch/microblaze/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  1714. +++ linux-4.1.39/arch/microblaze/mm/fault.c 2017-04-18 17:56:30.557394959 +0200
  1715. @@ -107,14 +107,14 @@
  1716. if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
  1717. is_write = 0;
  1718. - if (unlikely(in_atomic() || !mm)) {
  1719. + if (unlikely(faulthandler_disabled() || !mm)) {
  1720. if (kernel_mode(regs))
  1721. goto bad_area_nosemaphore;
  1722. - /* in_atomic() in user mode is really bad,
  1723. + /* faulthandler_disabled() in user mode is really bad,
  1724. as is current->mm == NULL. */
  1725. - pr_emerg("Page fault in user mode with in_atomic(), mm = %p\n",
  1726. - mm);
  1727. + pr_emerg("Page fault in user mode with faulthandler_disabled(), mm = %p\n",
  1728. + mm);
  1729. pr_emerg("r15 = %lx MSR = %lx\n",
  1730. regs->r15, regs->msr);
  1731. die("Weird page fault", regs, SIGSEGV);
  1732. diff -Nur linux-4.1.39.orig/arch/microblaze/mm/highmem.c linux-4.1.39/arch/microblaze/mm/highmem.c
  1733. --- linux-4.1.39.orig/arch/microblaze/mm/highmem.c 2017-03-13 21:04:36.000000000 +0100
  1734. +++ linux-4.1.39/arch/microblaze/mm/highmem.c 2017-04-18 17:56:30.557394959 +0200
  1735. @@ -37,7 +37,7 @@
  1736. unsigned long vaddr;
  1737. int idx, type;
  1738. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  1739. + preempt_disable();
  1740. pagefault_disable();
  1741. if (!PageHighMem(page))
  1742. return page_address(page);
  1743. @@ -63,6 +63,7 @@
  1744. if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
  1745. pagefault_enable();
  1746. + preempt_enable();
  1747. return;
  1748. }
  1749. @@ -84,5 +85,6 @@
  1750. #endif
  1751. kmap_atomic_idx_pop();
  1752. pagefault_enable();
  1753. + preempt_enable();
  1754. }
  1755. EXPORT_SYMBOL(__kunmap_atomic);
  1756. diff -Nur linux-4.1.39.orig/arch/mips/include/asm/uaccess.h linux-4.1.39/arch/mips/include/asm/uaccess.h
  1757. --- linux-4.1.39.orig/arch/mips/include/asm/uaccess.h 2017-03-13 21:04:36.000000000 +0100
  1758. +++ linux-4.1.39/arch/mips/include/asm/uaccess.h 2017-04-18 17:56:30.557394959 +0200
  1759. @@ -104,7 +104,8 @@
  1760. * @addr: User space pointer to start of block to check
  1761. * @size: Size of block to check
  1762. *
  1763. - * Context: User context only. This function may sleep.
  1764. + * Context: User context only. This function may sleep if pagefaults are
  1765. + * enabled.
  1766. *
  1767. * Checks if a pointer to a block of memory in user space is valid.
  1768. *
  1769. @@ -139,7 +140,8 @@
  1770. * @x: Value to copy to user space.
  1771. * @ptr: Destination address, in user space.
  1772. *
  1773. - * Context: User context only. This function may sleep.
  1774. + * Context: User context only. This function may sleep if pagefaults are
  1775. + * enabled.
  1776. *
  1777. * This macro copies a single simple value from kernel space to user
  1778. * space. It supports simple types like char and int, but not larger
  1779. @@ -158,7 +160,8 @@
  1780. * @x: Variable to store result.
  1781. * @ptr: Source address, in user space.
  1782. *
  1783. - * Context: User context only. This function may sleep.
  1784. + * Context: User context only. This function may sleep if pagefaults are
  1785. + * enabled.
  1786. *
  1787. * This macro copies a single simple variable from user space to kernel
  1788. * space. It supports simple types like char and int, but not larger
  1789. @@ -178,7 +181,8 @@
  1790. * @x: Value to copy to user space.
  1791. * @ptr: Destination address, in user space.
  1792. *
  1793. - * Context: User context only. This function may sleep.
  1794. + * Context: User context only. This function may sleep if pagefaults are
  1795. + * enabled.
  1796. *
  1797. * This macro copies a single simple value from kernel space to user
  1798. * space. It supports simple types like char and int, but not larger
  1799. @@ -200,7 +204,8 @@
  1800. * @x: Variable to store result.
  1801. * @ptr: Source address, in user space.
  1802. *
  1803. - * Context: User context only. This function may sleep.
  1804. + * Context: User context only. This function may sleep if pagefaults are
  1805. + * enabled.
  1806. *
  1807. * This macro copies a single simple variable from user space to kernel
  1808. * space. It supports simple types like char and int, but not larger
  1809. @@ -499,7 +504,8 @@
  1810. * @x: Value to copy to user space.
  1811. * @ptr: Destination address, in user space.
  1812. *
  1813. - * Context: User context only. This function may sleep.
  1814. + * Context: User context only. This function may sleep if pagefaults are
  1815. + * enabled.
  1816. *
  1817. * This macro copies a single simple value from kernel space to user
  1818. * space. It supports simple types like char and int, but not larger
  1819. @@ -518,7 +524,8 @@
  1820. * @x: Variable to store result.
  1821. * @ptr: Source address, in user space.
  1822. *
  1823. - * Context: User context only. This function may sleep.
  1824. + * Context: User context only. This function may sleep if pagefaults are
  1825. + * enabled.
  1826. *
  1827. * This macro copies a single simple variable from user space to kernel
  1828. * space. It supports simple types like char and int, but not larger
  1829. @@ -538,7 +545,8 @@
  1830. * @x: Value to copy to user space.
  1831. * @ptr: Destination address, in user space.
  1832. *
  1833. - * Context: User context only. This function may sleep.
  1834. + * Context: User context only. This function may sleep if pagefaults are
  1835. + * enabled.
  1836. *
  1837. * This macro copies a single simple value from kernel space to user
  1838. * space. It supports simple types like char and int, but not larger
  1839. @@ -560,7 +568,8 @@
  1840. * @x: Variable to store result.
  1841. * @ptr: Source address, in user space.
  1842. *
  1843. - * Context: User context only. This function may sleep.
  1844. + * Context: User context only. This function may sleep if pagefaults are
  1845. + * enabled.
  1846. *
  1847. * This macro copies a single simple variable from user space to kernel
  1848. * space. It supports simple types like char and int, but not larger
  1849. @@ -816,7 +825,8 @@
  1850. * @from: Source address, in kernel space.
  1851. * @n: Number of bytes to copy.
  1852. *
  1853. - * Context: User context only. This function may sleep.
  1854. + * Context: User context only. This function may sleep if pagefaults are
  1855. + * enabled.
  1856. *
  1857. * Copy data from kernel space to user space. Caller must check
  1858. * the specified block with access_ok() before calling this function.
  1859. @@ -889,7 +899,8 @@
  1860. * @from: Source address, in kernel space.
  1861. * @n: Number of bytes to copy.
  1862. *
  1863. - * Context: User context only. This function may sleep.
  1864. + * Context: User context only. This function may sleep if pagefaults are
  1865. + * enabled.
  1866. *
  1867. * Copy data from kernel space to user space.
  1868. *
  1869. @@ -1076,7 +1087,8 @@
  1870. * @from: Source address, in user space.
  1871. * @n: Number of bytes to copy.
  1872. *
  1873. - * Context: User context only. This function may sleep.
  1874. + * Context: User context only. This function may sleep if pagefaults are
  1875. + * enabled.
  1876. *
  1877. * Copy data from user space to kernel space. Caller must check
  1878. * the specified block with access_ok() before calling this function.
  1879. @@ -1108,7 +1120,8 @@
  1880. * @from: Source address, in user space.
  1881. * @n: Number of bytes to copy.
  1882. *
  1883. - * Context: User context only. This function may sleep.
  1884. + * Context: User context only. This function may sleep if pagefaults are
  1885. + * enabled.
  1886. *
  1887. * Copy data from user space to kernel space.
  1888. *
  1889. @@ -1332,7 +1345,8 @@
  1890. * strlen_user: - Get the size of a string in user space.
  1891. * @str: The string to measure.
  1892. *
  1893. - * Context: User context only. This function may sleep.
  1894. + * Context: User context only. This function may sleep if pagefaults are
  1895. + * enabled.
  1896. *
  1897. * Get the size of a NUL-terminated string in user space.
  1898. *
  1899. @@ -1401,7 +1415,8 @@
  1900. * strnlen_user: - Get the size of a string in user space.
  1901. * @str: The string to measure.
  1902. *
  1903. - * Context: User context only. This function may sleep.
  1904. + * Context: User context only. This function may sleep if pagefaults are
  1905. + * enabled.
  1906. *
  1907. * Get the size of a NUL-terminated string in user space.
  1908. *
  1909. diff -Nur linux-4.1.39.orig/arch/mips/Kconfig linux-4.1.39/arch/mips/Kconfig
  1910. --- linux-4.1.39.orig/arch/mips/Kconfig 2017-03-13 21:04:36.000000000 +0100
  1911. +++ linux-4.1.39/arch/mips/Kconfig 2017-04-18 17:56:30.557394959 +0200
  1912. @@ -2367,7 +2367,7 @@
  1913. #
  1914. config HIGHMEM
  1915. bool "High Memory Support"
  1916. - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
  1917. + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL
  1918. config CPU_SUPPORTS_HIGHMEM
  1919. bool
  1920. diff -Nur linux-4.1.39.orig/arch/mips/kernel/signal-common.h linux-4.1.39/arch/mips/kernel/signal-common.h
  1921. --- linux-4.1.39.orig/arch/mips/kernel/signal-common.h 2017-03-13 21:04:36.000000000 +0100
  1922. +++ linux-4.1.39/arch/mips/kernel/signal-common.h 2017-04-18 17:56:30.557394959 +0200
  1923. @@ -28,12 +28,7 @@
  1924. extern int fpcsr_pending(unsigned int __user *fpcsr);
  1925. /* Make sure we will not lose FPU ownership */
  1926. -#ifdef CONFIG_PREEMPT
  1927. -#define lock_fpu_owner() preempt_disable()
  1928. -#define unlock_fpu_owner() preempt_enable()
  1929. -#else
  1930. -#define lock_fpu_owner() pagefault_disable()
  1931. -#define unlock_fpu_owner() pagefault_enable()
  1932. -#endif
  1933. +#define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); })
  1934. +#define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); })
  1935. #endif /* __SIGNAL_COMMON_H */
  1936. diff -Nur linux-4.1.39.orig/arch/mips/mm/fault.c linux-4.1.39/arch/mips/mm/fault.c
  1937. --- linux-4.1.39.orig/arch/mips/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  1938. +++ linux-4.1.39/arch/mips/mm/fault.c 2017-04-18 17:56:30.557394959 +0200
  1939. @@ -21,10 +21,10 @@
  1940. #include <linux/module.h>
  1941. #include <linux/kprobes.h>
  1942. #include <linux/perf_event.h>
  1943. +#include <linux/uaccess.h>
  1944. #include <asm/branch.h>
  1945. #include <asm/mmu_context.h>
  1946. -#include <asm/uaccess.h>
  1947. #include <asm/ptrace.h>
  1948. #include <asm/highmem.h> /* For VMALLOC_END */
  1949. #include <linux/kdebug.h>
  1950. @@ -94,7 +94,7 @@
  1951. * If we're in an interrupt or have no user
  1952. * context, we must not take the fault..
  1953. */
  1954. - if (in_atomic() || !mm)
  1955. + if (faulthandler_disabled() || !mm)
  1956. goto bad_area_nosemaphore;
  1957. if (user_mode(regs))
  1958. diff -Nur linux-4.1.39.orig/arch/mips/mm/highmem.c linux-4.1.39/arch/mips/mm/highmem.c
  1959. --- linux-4.1.39.orig/arch/mips/mm/highmem.c 2017-03-13 21:04:36.000000000 +0100
  1960. +++ linux-4.1.39/arch/mips/mm/highmem.c 2017-04-18 17:56:30.557394959 +0200
  1961. @@ -47,7 +47,7 @@
  1962. unsigned long vaddr;
  1963. int idx, type;
  1964. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  1965. + preempt_disable();
  1966. pagefault_disable();
  1967. if (!PageHighMem(page))
  1968. return page_address(page);
  1969. @@ -72,6 +72,7 @@
  1970. if (vaddr < FIXADDR_START) { // FIXME
  1971. pagefault_enable();
  1972. + preempt_enable();
  1973. return;
  1974. }
  1975. @@ -92,6 +93,7 @@
  1976. #endif
  1977. kmap_atomic_idx_pop();
  1978. pagefault_enable();
  1979. + preempt_enable();
  1980. }
  1981. EXPORT_SYMBOL(__kunmap_atomic);
  1982. @@ -104,6 +106,7 @@
  1983. unsigned long vaddr;
  1984. int idx, type;
  1985. + preempt_disable();
  1986. pagefault_disable();
  1987. type = kmap_atomic_idx_push();
  1988. diff -Nur linux-4.1.39.orig/arch/mips/mm/init.c linux-4.1.39/arch/mips/mm/init.c
  1989. --- linux-4.1.39.orig/arch/mips/mm/init.c 2017-03-13 21:04:36.000000000 +0100
  1990. +++ linux-4.1.39/arch/mips/mm/init.c 2017-04-18 17:56:30.557394959 +0200
  1991. @@ -90,6 +90,7 @@
  1992. BUG_ON(Page_dcache_dirty(page));
  1993. + preempt_disable();
  1994. pagefault_disable();
  1995. idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
  1996. idx += in_interrupt() ? FIX_N_COLOURS : 0;
  1997. @@ -152,6 +153,7 @@
  1998. write_c0_entryhi(old_ctx);
  1999. local_irq_restore(flags);
  2000. pagefault_enable();
  2001. + preempt_enable();
  2002. }
  2003. void copy_user_highpage(struct page *to, struct page *from,
  2004. diff -Nur linux-4.1.39.orig/arch/mn10300/include/asm/highmem.h linux-4.1.39/arch/mn10300/include/asm/highmem.h
  2005. --- linux-4.1.39.orig/arch/mn10300/include/asm/highmem.h 2017-03-13 21:04:36.000000000 +0100
  2006. +++ linux-4.1.39/arch/mn10300/include/asm/highmem.h 2017-04-18 17:56:30.557394959 +0200
  2007. @@ -75,6 +75,7 @@
  2008. unsigned long vaddr;
  2009. int idx, type;
  2010. + preempt_disable();
  2011. pagefault_disable();
  2012. if (page < highmem_start_page)
  2013. return page_address(page);
  2014. @@ -98,6 +99,7 @@
  2015. if (vaddr < FIXADDR_START) { /* FIXME */
  2016. pagefault_enable();
  2017. + preempt_enable();
  2018. return;
  2019. }
  2020. @@ -122,6 +124,7 @@
  2021. kmap_atomic_idx_pop();
  2022. pagefault_enable();
  2023. + preempt_enable();
  2024. }
  2025. #endif /* __KERNEL__ */
  2026. diff -Nur linux-4.1.39.orig/arch/mn10300/mm/fault.c linux-4.1.39/arch/mn10300/mm/fault.c
  2027. --- linux-4.1.39.orig/arch/mn10300/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  2028. +++ linux-4.1.39/arch/mn10300/mm/fault.c 2017-04-18 17:56:30.557394959 +0200
  2029. @@ -23,8 +23,8 @@
  2030. #include <linux/interrupt.h>
  2031. #include <linux/init.h>
  2032. #include <linux/vt_kern.h> /* For unblank_screen() */
  2033. +#include <linux/uaccess.h>
  2034. -#include <asm/uaccess.h>
  2035. #include <asm/pgalloc.h>
  2036. #include <asm/hardirq.h>
  2037. #include <asm/cpu-regs.h>
  2038. @@ -168,7 +168,7 @@
  2039. * If we're in an interrupt or have no user
  2040. * context, we must not take the fault..
  2041. */
  2042. - if (in_atomic() || !mm)
  2043. + if (faulthandler_disabled() || !mm)
  2044. goto no_context;
  2045. if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
  2046. diff -Nur linux-4.1.39.orig/arch/nios2/mm/fault.c linux-4.1.39/arch/nios2/mm/fault.c
  2047. --- linux-4.1.39.orig/arch/nios2/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  2048. +++ linux-4.1.39/arch/nios2/mm/fault.c 2017-04-18 17:56:30.557394959 +0200
  2049. @@ -77,7 +77,7 @@
  2050. * If we're in an interrupt or have no user
  2051. * context, we must not take the fault..
  2052. */
  2053. - if (in_atomic() || !mm)
  2054. + if (faulthandler_disabled() || !mm)
  2055. goto bad_area_nosemaphore;
  2056. if (user_mode(regs))
  2057. diff -Nur linux-4.1.39.orig/arch/parisc/include/asm/cacheflush.h linux-4.1.39/arch/parisc/include/asm/cacheflush.h
  2058. --- linux-4.1.39.orig/arch/parisc/include/asm/cacheflush.h 2017-03-13 21:04:36.000000000 +0100
  2059. +++ linux-4.1.39/arch/parisc/include/asm/cacheflush.h 2017-04-18 17:56:30.557394959 +0200
  2060. @@ -142,6 +142,7 @@
  2061. static inline void *kmap_atomic(struct page *page)
  2062. {
  2063. + preempt_disable();
  2064. pagefault_disable();
  2065. return page_address(page);
  2066. }
  2067. @@ -150,6 +151,7 @@
  2068. {
  2069. flush_kernel_dcache_page_addr(addr);
  2070. pagefault_enable();
  2071. + preempt_enable();
  2072. }
  2073. #define kmap_atomic_prot(page, prot) kmap_atomic(page)
  2074. diff -Nur linux-4.1.39.orig/arch/parisc/kernel/traps.c linux-4.1.39/arch/parisc/kernel/traps.c
  2075. --- linux-4.1.39.orig/arch/parisc/kernel/traps.c 2017-03-13 21:04:36.000000000 +0100
  2076. +++ linux-4.1.39/arch/parisc/kernel/traps.c 2017-04-18 17:56:30.557394959 +0200
  2077. @@ -26,9 +26,9 @@
  2078. #include <linux/console.h>
  2079. #include <linux/bug.h>
  2080. #include <linux/ratelimit.h>
  2081. +#include <linux/uaccess.h>
  2082. #include <asm/assembly.h>
  2083. -#include <asm/uaccess.h>
  2084. #include <asm/io.h>
  2085. #include <asm/irq.h>
  2086. #include <asm/traps.h>
  2087. @@ -796,7 +796,7 @@
  2088. * unless pagefault_disable() was called before.
  2089. */
  2090. - if (fault_space == 0 && !in_atomic())
  2091. + if (fault_space == 0 && !faulthandler_disabled())
  2092. {
  2093. /* Clean up and return if in exception table. */
  2094. if (fixup_exception(regs))
  2095. diff -Nur linux-4.1.39.orig/arch/parisc/mm/fault.c linux-4.1.39/arch/parisc/mm/fault.c
  2096. --- linux-4.1.39.orig/arch/parisc/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  2097. +++ linux-4.1.39/arch/parisc/mm/fault.c 2017-04-18 17:56:30.557394959 +0200
  2098. @@ -15,8 +15,8 @@
  2099. #include <linux/sched.h>
  2100. #include <linux/interrupt.h>
  2101. #include <linux/module.h>
  2102. +#include <linux/uaccess.h>
  2103. -#include <asm/uaccess.h>
  2104. #include <asm/traps.h>
  2105. /* Various important other fields */
  2106. @@ -208,7 +208,7 @@
  2107. int fault;
  2108. unsigned int flags;
  2109. - if (in_atomic())
  2110. + if (pagefault_disabled())
  2111. goto no_context;
  2112. tsk = current;
  2113. diff -Nur linux-4.1.39.orig/arch/powerpc/include/asm/kvm_host.h linux-4.1.39/arch/powerpc/include/asm/kvm_host.h
  2114. --- linux-4.1.39.orig/arch/powerpc/include/asm/kvm_host.h 2017-03-13 21:04:36.000000000 +0100
  2115. +++ linux-4.1.39/arch/powerpc/include/asm/kvm_host.h 2017-04-18 17:56:30.557394959 +0200
  2116. @@ -280,7 +280,7 @@
  2117. u8 in_guest;
  2118. struct list_head runnable_threads;
  2119. spinlock_t lock;
  2120. - wait_queue_head_t wq;
  2121. + struct swait_head wq;
  2122. spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
  2123. u64 stolen_tb;
  2124. u64 preempt_tb;
  2125. @@ -614,7 +614,7 @@
  2126. u8 prodded;
  2127. u32 last_inst;
  2128. - wait_queue_head_t *wqp;
  2129. + struct swait_head *wqp;
  2130. struct kvmppc_vcore *vcore;
  2131. int ret;
  2132. int trap;
  2133. diff -Nur linux-4.1.39.orig/arch/powerpc/include/asm/thread_info.h linux-4.1.39/arch/powerpc/include/asm/thread_info.h
  2134. --- linux-4.1.39.orig/arch/powerpc/include/asm/thread_info.h 2017-03-13 21:04:36.000000000 +0100
  2135. +++ linux-4.1.39/arch/powerpc/include/asm/thread_info.h 2017-04-18 17:56:30.557394959 +0200
  2136. @@ -42,6 +42,8 @@
  2137. int cpu; /* cpu we're on */
  2138. int preempt_count; /* 0 => preemptable,
  2139. <0 => BUG */
  2140. + int preempt_lazy_count; /* 0 => preemptable,
  2141. + <0 => BUG */
  2142. unsigned long local_flags; /* private flags for thread */
  2143. /* low level flags - has atomic operations done on it */
  2144. @@ -82,8 +84,7 @@
  2145. #define TIF_SYSCALL_TRACE 0 /* syscall trace active */
  2146. #define TIF_SIGPENDING 1 /* signal pending */
  2147. #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
  2148. -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
  2149. - TIF_NEED_RESCHED */
  2150. +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */
  2151. #define TIF_32BIT 4 /* 32 bit binary */
  2152. #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
  2153. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  2154. @@ -101,6 +102,8 @@
  2155. #if defined(CONFIG_PPC64)
  2156. #define TIF_ELF2ABI 18 /* function descriptors must die! */
  2157. #endif
  2158. +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling
  2159. + TIF_NEED_RESCHED */
  2160. /* as above, but as bit values */
  2161. #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
  2162. @@ -119,14 +122,16 @@
  2163. #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
  2164. #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
  2165. #define _TIF_NOHZ (1<<TIF_NOHZ)
  2166. +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
  2167. #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
  2168. _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
  2169. _TIF_NOHZ)
  2170. #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
  2171. _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  2172. - _TIF_RESTORE_TM)
  2173. + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY)
  2174. #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
  2175. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  2176. /* Bits in local_flags */
  2177. /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
  2178. diff -Nur linux-4.1.39.orig/arch/powerpc/Kconfig linux-4.1.39/arch/powerpc/Kconfig
  2179. --- linux-4.1.39.orig/arch/powerpc/Kconfig 2017-03-13 21:04:36.000000000 +0100
  2180. +++ linux-4.1.39/arch/powerpc/Kconfig 2017-04-18 17:56:30.557394959 +0200
  2181. @@ -60,10 +60,11 @@
  2182. config RWSEM_GENERIC_SPINLOCK
  2183. bool
  2184. + default y if PREEMPT_RT_FULL
  2185. config RWSEM_XCHGADD_ALGORITHM
  2186. bool
  2187. - default y
  2188. + default y if !PREEMPT_RT_FULL
  2189. config GENERIC_LOCKBREAK
  2190. bool
  2191. @@ -138,6 +139,7 @@
  2192. select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
  2193. select GENERIC_STRNCPY_FROM_USER
  2194. select GENERIC_STRNLEN_USER
  2195. + select HAVE_PREEMPT_LAZY
  2196. select HAVE_MOD_ARCH_SPECIFIC
  2197. select MODULES_USE_ELF_RELA
  2198. select CLONE_BACKWARDS
  2199. @@ -312,7 +314,7 @@
  2200. config HIGHMEM
  2201. bool "High memory support"
  2202. - depends on PPC32
  2203. + depends on PPC32 && !PREEMPT_RT_FULL
  2204. source kernel/Kconfig.hz
  2205. source kernel/Kconfig.preempt
  2206. diff -Nur linux-4.1.39.orig/arch/powerpc/kernel/asm-offsets.c linux-4.1.39/arch/powerpc/kernel/asm-offsets.c
  2207. --- linux-4.1.39.orig/arch/powerpc/kernel/asm-offsets.c 2017-03-13 21:04:36.000000000 +0100
  2208. +++ linux-4.1.39/arch/powerpc/kernel/asm-offsets.c 2017-04-18 17:56:30.557394959 +0200
  2209. @@ -160,6 +160,7 @@
  2210. DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
  2211. DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
  2212. DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
  2213. + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
  2214. DEFINE(TI_TASK, offsetof(struct thread_info, task));
  2215. DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
  2216. diff -Nur linux-4.1.39.orig/arch/powerpc/kernel/entry_32.S linux-4.1.39/arch/powerpc/kernel/entry_32.S
  2217. --- linux-4.1.39.orig/arch/powerpc/kernel/entry_32.S 2017-03-13 21:04:36.000000000 +0100
  2218. +++ linux-4.1.39/arch/powerpc/kernel/entry_32.S 2017-04-18 17:56:30.557394959 +0200
  2219. @@ -813,7 +813,14 @@
  2220. cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  2221. bne restore
  2222. andi. r8,r8,_TIF_NEED_RESCHED
  2223. + bne+ 1f
  2224. + lwz r0,TI_PREEMPT_LAZY(r9)
  2225. + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
  2226. + bne restore
  2227. + lwz r0,TI_FLAGS(r9)
  2228. + andi. r0,r0,_TIF_NEED_RESCHED_LAZY
  2229. beq+ restore
  2230. +1:
  2231. lwz r3,_MSR(r1)
  2232. andi. r0,r3,MSR_EE /* interrupts off? */
  2233. beq restore /* don't schedule if so */
  2234. @@ -824,11 +831,11 @@
  2235. */
  2236. bl trace_hardirqs_off
  2237. #endif
  2238. -1: bl preempt_schedule_irq
  2239. +2: bl preempt_schedule_irq
  2240. CURRENT_THREAD_INFO(r9, r1)
  2241. lwz r3,TI_FLAGS(r9)
  2242. - andi. r0,r3,_TIF_NEED_RESCHED
  2243. - bne- 1b
  2244. + andi. r0,r3,_TIF_NEED_RESCHED_MASK
  2245. + bne- 2b
  2246. #ifdef CONFIG_TRACE_IRQFLAGS
  2247. /* And now, to properly rebalance the above, we tell lockdep they
  2248. * are being turned back on, which will happen when we return
  2249. @@ -1149,7 +1156,7 @@
  2250. #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
  2251. do_work: /* r10 contains MSR_KERNEL here */
  2252. - andi. r0,r9,_TIF_NEED_RESCHED
  2253. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  2254. beq do_user_signal
  2255. do_resched: /* r10 contains MSR_KERNEL here */
  2256. @@ -1170,7 +1177,7 @@
  2257. MTMSRD(r10) /* disable interrupts */
  2258. CURRENT_THREAD_INFO(r9, r1)
  2259. lwz r9,TI_FLAGS(r9)
  2260. - andi. r0,r9,_TIF_NEED_RESCHED
  2261. + andi. r0,r9,_TIF_NEED_RESCHED_MASK
  2262. bne- do_resched
  2263. andi. r0,r9,_TIF_USER_WORK_MASK
  2264. beq restore_user
  2265. diff -Nur linux-4.1.39.orig/arch/powerpc/kernel/entry_64.S linux-4.1.39/arch/powerpc/kernel/entry_64.S
  2266. --- linux-4.1.39.orig/arch/powerpc/kernel/entry_64.S 2017-03-13 21:04:36.000000000 +0100
  2267. +++ linux-4.1.39/arch/powerpc/kernel/entry_64.S 2017-04-18 17:56:30.561395114 +0200
  2268. @@ -636,7 +636,7 @@
  2269. #else
  2270. beq restore
  2271. #endif
  2272. -1: andi. r0,r4,_TIF_NEED_RESCHED
  2273. +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK
  2274. beq 2f
  2275. bl restore_interrupts
  2276. SCHEDULE_USER
  2277. @@ -698,10 +698,18 @@
  2278. #ifdef CONFIG_PREEMPT
  2279. /* Check if we need to preempt */
  2280. + lwz r8,TI_PREEMPT(r9)
  2281. + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */
  2282. + bne restore
  2283. andi. r0,r4,_TIF_NEED_RESCHED
  2284. + bne+ check_count
  2285. +
  2286. + andi. r0,r4,_TIF_NEED_RESCHED_LAZY
  2287. beq+ restore
  2288. + lwz r8,TI_PREEMPT_LAZY(r9)
  2289. +
  2290. /* Check that preempt_count() == 0 and interrupts are enabled */
  2291. - lwz r8,TI_PREEMPT(r9)
  2292. +check_count:
  2293. cmpwi cr1,r8,0
  2294. ld r0,SOFTE(r1)
  2295. cmpdi r0,0
  2296. @@ -718,7 +726,7 @@
  2297. /* Re-test flags and eventually loop */
  2298. CURRENT_THREAD_INFO(r9, r1)
  2299. ld r4,TI_FLAGS(r9)
  2300. - andi. r0,r4,_TIF_NEED_RESCHED
  2301. + andi. r0,r4,_TIF_NEED_RESCHED_MASK
  2302. bne 1b
  2303. /*
  2304. diff -Nur linux-4.1.39.orig/arch/powerpc/kernel/irq.c linux-4.1.39/arch/powerpc/kernel/irq.c
  2305. --- linux-4.1.39.orig/arch/powerpc/kernel/irq.c 2017-03-13 21:04:36.000000000 +0100
  2306. +++ linux-4.1.39/arch/powerpc/kernel/irq.c 2017-04-18 17:56:30.561395114 +0200
  2307. @@ -614,6 +614,7 @@
  2308. }
  2309. }
  2310. +#ifndef CONFIG_PREEMPT_RT_FULL
  2311. void do_softirq_own_stack(void)
  2312. {
  2313. struct thread_info *curtp, *irqtp;
  2314. @@ -631,6 +632,7 @@
  2315. if (irqtp->flags)
  2316. set_bits(irqtp->flags, &curtp->flags);
  2317. }
  2318. +#endif
  2319. irq_hw_number_t virq_to_hw(unsigned int virq)
  2320. {
  2321. diff -Nur linux-4.1.39.orig/arch/powerpc/kernel/misc_32.S linux-4.1.39/arch/powerpc/kernel/misc_32.S
  2322. --- linux-4.1.39.orig/arch/powerpc/kernel/misc_32.S 2017-03-13 21:04:36.000000000 +0100
  2323. +++ linux-4.1.39/arch/powerpc/kernel/misc_32.S 2017-04-18 17:56:30.561395114 +0200
  2324. @@ -40,6 +40,7 @@
  2325. * We store the saved ksp_limit in the unused part
  2326. * of the STACK_FRAME_OVERHEAD
  2327. */
  2328. +#ifndef CONFIG_PREEMPT_RT_FULL
  2329. _GLOBAL(call_do_softirq)
  2330. mflr r0
  2331. stw r0,4(r1)
  2332. @@ -56,6 +57,7 @@
  2333. stw r10,THREAD+KSP_LIMIT(r2)
  2334. mtlr r0
  2335. blr
  2336. +#endif
  2337. /*
  2338. * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
  2339. diff -Nur linux-4.1.39.orig/arch/powerpc/kernel/misc_64.S linux-4.1.39/arch/powerpc/kernel/misc_64.S
  2340. --- linux-4.1.39.orig/arch/powerpc/kernel/misc_64.S 2017-03-13 21:04:36.000000000 +0100
  2341. +++ linux-4.1.39/arch/powerpc/kernel/misc_64.S 2017-04-18 17:56:30.561395114 +0200
  2342. @@ -29,6 +29,7 @@
  2343. .text
  2344. +#ifndef CONFIG_PREEMPT_RT_FULL
  2345. _GLOBAL(call_do_softirq)
  2346. mflr r0
  2347. std r0,16(r1)
  2348. @@ -39,6 +40,7 @@
  2349. ld r0,16(r1)
  2350. mtlr r0
  2351. blr
  2352. +#endif
  2353. _GLOBAL(call_do_irq)
  2354. mflr r0
  2355. diff -Nur linux-4.1.39.orig/arch/powerpc/kvm/book3s_hv.c linux-4.1.39/arch/powerpc/kvm/book3s_hv.c
  2356. --- linux-4.1.39.orig/arch/powerpc/kvm/book3s_hv.c 2017-03-13 21:04:36.000000000 +0100
  2357. +++ linux-4.1.39/arch/powerpc/kvm/book3s_hv.c 2017-04-18 17:56:30.561395114 +0200
  2358. @@ -115,11 +115,11 @@
  2359. static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
  2360. {
  2361. int cpu = vcpu->cpu;
  2362. - wait_queue_head_t *wqp;
  2363. + struct swait_head *wqp;
  2364. wqp = kvm_arch_vcpu_wq(vcpu);
  2365. - if (waitqueue_active(wqp)) {
  2366. - wake_up_interruptible(wqp);
  2367. + if (swaitqueue_active(wqp)) {
  2368. + swait_wake_interruptible(wqp);
  2369. ++vcpu->stat.halt_wakeup;
  2370. }
  2371. @@ -692,8 +692,8 @@
  2372. tvcpu->arch.prodded = 1;
  2373. smp_mb();
  2374. if (vcpu->arch.ceded) {
  2375. - if (waitqueue_active(&vcpu->wq)) {
  2376. - wake_up_interruptible(&vcpu->wq);
  2377. + if (swaitqueue_active(&vcpu->wq)) {
  2378. + swait_wake_interruptible(&vcpu->wq);
  2379. vcpu->stat.halt_wakeup++;
  2380. }
  2381. }
  2382. @@ -1438,7 +1438,7 @@
  2383. INIT_LIST_HEAD(&vcore->runnable_threads);
  2384. spin_lock_init(&vcore->lock);
  2385. spin_lock_init(&vcore->stoltb_lock);
  2386. - init_waitqueue_head(&vcore->wq);
  2387. + init_swait_head(&vcore->wq);
  2388. vcore->preempt_tb = TB_NIL;
  2389. vcore->lpcr = kvm->arch.lpcr;
  2390. vcore->first_vcpuid = core * threads_per_subcore;
  2391. @@ -2085,10 +2085,9 @@
  2392. {
  2393. struct kvm_vcpu *vcpu;
  2394. int do_sleep = 1;
  2395. + DEFINE_SWAITER(wait);
  2396. - DEFINE_WAIT(wait);
  2397. -
  2398. - prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
  2399. + swait_prepare(&vc->wq, &wait, TASK_INTERRUPTIBLE);
  2400. /*
  2401. * Check one last time for pending exceptions and ceded state after
  2402. @@ -2102,7 +2101,7 @@
  2403. }
  2404. if (!do_sleep) {
  2405. - finish_wait(&vc->wq, &wait);
  2406. + swait_finish(&vc->wq, &wait);
  2407. return;
  2408. }
  2409. @@ -2110,7 +2109,7 @@
  2410. trace_kvmppc_vcore_blocked(vc, 0);
  2411. spin_unlock(&vc->lock);
  2412. schedule();
  2413. - finish_wait(&vc->wq, &wait);
  2414. + swait_finish(&vc->wq, &wait);
  2415. spin_lock(&vc->lock);
  2416. vc->vcore_state = VCORE_INACTIVE;
  2417. trace_kvmppc_vcore_blocked(vc, 1);
  2418. @@ -2154,7 +2153,7 @@
  2419. kvmppc_start_thread(vcpu);
  2420. trace_kvm_guest_enter(vcpu);
  2421. } else if (vc->vcore_state == VCORE_SLEEPING) {
  2422. - wake_up(&vc->wq);
  2423. + swait_wake(&vc->wq);
  2424. }
  2425. }
  2426. diff -Nur linux-4.1.39.orig/arch/powerpc/kvm/Kconfig linux-4.1.39/arch/powerpc/kvm/Kconfig
  2427. --- linux-4.1.39.orig/arch/powerpc/kvm/Kconfig 2017-03-13 21:04:36.000000000 +0100
  2428. +++ linux-4.1.39/arch/powerpc/kvm/Kconfig 2017-04-18 17:56:30.561395114 +0200
  2429. @@ -172,6 +172,7 @@
  2430. config KVM_MPIC
  2431. bool "KVM in-kernel MPIC emulation"
  2432. depends on KVM && E500
  2433. + depends on !PREEMPT_RT_FULL
  2434. select HAVE_KVM_IRQCHIP
  2435. select HAVE_KVM_IRQFD
  2436. select HAVE_KVM_IRQ_ROUTING
  2437. diff -Nur linux-4.1.39.orig/arch/powerpc/mm/fault.c linux-4.1.39/arch/powerpc/mm/fault.c
  2438. --- linux-4.1.39.orig/arch/powerpc/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  2439. +++ linux-4.1.39/arch/powerpc/mm/fault.c 2017-04-18 17:56:30.561395114 +0200
  2440. @@ -33,13 +33,13 @@
  2441. #include <linux/ratelimit.h>
  2442. #include <linux/context_tracking.h>
  2443. #include <linux/hugetlb.h>
  2444. +#include <linux/uaccess.h>
  2445. #include <asm/firmware.h>
  2446. #include <asm/page.h>
  2447. #include <asm/pgtable.h>
  2448. #include <asm/mmu.h>
  2449. #include <asm/mmu_context.h>
  2450. -#include <asm/uaccess.h>
  2451. #include <asm/tlbflush.h>
  2452. #include <asm/siginfo.h>
  2453. #include <asm/debug.h>
  2454. @@ -272,15 +272,16 @@
  2455. if (!arch_irq_disabled_regs(regs))
  2456. local_irq_enable();
  2457. - if (in_atomic() || mm == NULL) {
  2458. + if (faulthandler_disabled() || mm == NULL) {
  2459. if (!user_mode(regs)) {
  2460. rc = SIGSEGV;
  2461. goto bail;
  2462. }
  2463. - /* in_atomic() in user mode is really bad,
  2464. + /* faulthandler_disabled() in user mode is really bad,
  2465. as is current->mm == NULL. */
  2466. printk(KERN_EMERG "Page fault in user mode with "
  2467. - "in_atomic() = %d mm = %p\n", in_atomic(), mm);
  2468. + "faulthandler_disabled() = %d mm = %p\n",
  2469. + faulthandler_disabled(), mm);
  2470. printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
  2471. regs->nip, regs->msr);
  2472. die("Weird page fault", regs, SIGSEGV);
  2473. diff -Nur linux-4.1.39.orig/arch/powerpc/mm/highmem.c linux-4.1.39/arch/powerpc/mm/highmem.c
  2474. --- linux-4.1.39.orig/arch/powerpc/mm/highmem.c 2017-03-13 21:04:36.000000000 +0100
  2475. +++ linux-4.1.39/arch/powerpc/mm/highmem.c 2017-04-18 17:56:30.561395114 +0200
  2476. @@ -34,7 +34,7 @@
  2477. unsigned long vaddr;
  2478. int idx, type;
  2479. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  2480. + preempt_disable();
  2481. pagefault_disable();
  2482. if (!PageHighMem(page))
  2483. return page_address(page);
  2484. @@ -59,6 +59,7 @@
  2485. if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
  2486. pagefault_enable();
  2487. + preempt_enable();
  2488. return;
  2489. }
  2490. @@ -82,5 +83,6 @@
  2491. kmap_atomic_idx_pop();
  2492. pagefault_enable();
  2493. + preempt_enable();
  2494. }
  2495. EXPORT_SYMBOL(__kunmap_atomic);
  2496. diff -Nur linux-4.1.39.orig/arch/powerpc/platforms/ps3/device-init.c linux-4.1.39/arch/powerpc/platforms/ps3/device-init.c
  2497. --- linux-4.1.39.orig/arch/powerpc/platforms/ps3/device-init.c 2017-03-13 21:04:36.000000000 +0100
  2498. +++ linux-4.1.39/arch/powerpc/platforms/ps3/device-init.c 2017-04-18 17:56:30.561395114 +0200
  2499. @@ -752,7 +752,7 @@
  2500. }
  2501. pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
  2502. - res = wait_event_interruptible(dev->done.wait,
  2503. + res = swait_event_interruptible(dev->done.wait,
  2504. dev->done.done || kthread_should_stop());
  2505. if (kthread_should_stop())
  2506. res = -EINTR;
  2507. diff -Nur linux-4.1.39.orig/arch/s390/include/asm/kvm_host.h linux-4.1.39/arch/s390/include/asm/kvm_host.h
  2508. --- linux-4.1.39.orig/arch/s390/include/asm/kvm_host.h 2017-03-13 21:04:36.000000000 +0100
  2509. +++ linux-4.1.39/arch/s390/include/asm/kvm_host.h 2017-04-18 17:56:30.561395114 +0200
  2510. @@ -419,7 +419,7 @@
  2511. struct kvm_s390_local_interrupt {
  2512. spinlock_t lock;
  2513. struct kvm_s390_float_interrupt *float_int;
  2514. - wait_queue_head_t *wq;
  2515. + struct swait_head *wq;
  2516. atomic_t *cpuflags;
  2517. DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
  2518. struct kvm_s390_irq_payload irq;
  2519. diff -Nur linux-4.1.39.orig/arch/s390/include/asm/uaccess.h linux-4.1.39/arch/s390/include/asm/uaccess.h
  2520. --- linux-4.1.39.orig/arch/s390/include/asm/uaccess.h 2017-03-13 21:04:36.000000000 +0100
  2521. +++ linux-4.1.39/arch/s390/include/asm/uaccess.h 2017-04-18 17:56:30.561395114 +0200
  2522. @@ -98,7 +98,8 @@
  2523. * @from: Source address, in user space.
  2524. * @n: Number of bytes to copy.
  2525. *
  2526. - * Context: User context only. This function may sleep.
  2527. + * Context: User context only. This function may sleep if pagefaults are
  2528. + * enabled.
  2529. *
  2530. * Copy data from user space to kernel space. Caller must check
  2531. * the specified block with access_ok() before calling this function.
  2532. @@ -118,7 +119,8 @@
  2533. * @from: Source address, in kernel space.
  2534. * @n: Number of bytes to copy.
  2535. *
  2536. - * Context: User context only. This function may sleep.
  2537. + * Context: User context only. This function may sleep if pagefaults are
  2538. + * enabled.
  2539. *
  2540. * Copy data from kernel space to user space. Caller must check
  2541. * the specified block with access_ok() before calling this function.
  2542. @@ -264,7 +266,8 @@
  2543. * @from: Source address, in kernel space.
  2544. * @n: Number of bytes to copy.
  2545. *
  2546. - * Context: User context only. This function may sleep.
  2547. + * Context: User context only. This function may sleep if pagefaults are
  2548. + * enabled.
  2549. *
  2550. * Copy data from kernel space to user space.
  2551. *
  2552. @@ -290,7 +293,8 @@
  2553. * @from: Source address, in user space.
  2554. * @n: Number of bytes to copy.
  2555. *
  2556. - * Context: User context only. This function may sleep.
  2557. + * Context: User context only. This function may sleep if pagefaults are
  2558. + * enabled.
  2559. *
  2560. * Copy data from user space to kernel space.
  2561. *
  2562. @@ -348,7 +352,8 @@
  2563. * strlen_user: - Get the size of a string in user space.
  2564. * @str: The string to measure.
  2565. *
  2566. - * Context: User context only. This function may sleep.
  2567. + * Context: User context only. This function may sleep if pagefaults are
  2568. + * enabled.
  2569. *
  2570. * Get the size of a NUL-terminated string in user space.
  2571. *
  2572. diff -Nur linux-4.1.39.orig/arch/s390/kvm/interrupt.c linux-4.1.39/arch/s390/kvm/interrupt.c
  2573. --- linux-4.1.39.orig/arch/s390/kvm/interrupt.c 2017-03-13 21:04:36.000000000 +0100
  2574. +++ linux-4.1.39/arch/s390/kvm/interrupt.c 2017-04-18 17:56:30.561395114 +0200
  2575. @@ -875,13 +875,13 @@
  2576. void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
  2577. {
  2578. - if (waitqueue_active(&vcpu->wq)) {
  2579. + if (swaitqueue_active(&vcpu->wq)) {
  2580. /*
  2581. * The vcpu gave up the cpu voluntarily, mark it as a good
  2582. * yield-candidate.
  2583. */
  2584. vcpu->preempted = true;
  2585. - wake_up_interruptible(&vcpu->wq);
  2586. + swait_wake_interruptible(&vcpu->wq);
  2587. vcpu->stat.halt_wakeup++;
  2588. }
  2589. }
  2590. @@ -987,7 +987,7 @@
  2591. spin_lock(&li->lock);
  2592. irq.u.pgm.code = code;
  2593. __inject_prog(vcpu, &irq);
  2594. - BUG_ON(waitqueue_active(li->wq));
  2595. + BUG_ON(swaitqueue_active(li->wq));
  2596. spin_unlock(&li->lock);
  2597. return 0;
  2598. }
  2599. @@ -1006,7 +1006,7 @@
  2600. spin_lock(&li->lock);
  2601. irq.u.pgm = *pgm_info;
  2602. rc = __inject_prog(vcpu, &irq);
  2603. - BUG_ON(waitqueue_active(li->wq));
  2604. + BUG_ON(swaitqueue_active(li->wq));
  2605. spin_unlock(&li->lock);
  2606. return rc;
  2607. }
  2608. diff -Nur linux-4.1.39.orig/arch/s390/mm/fault.c linux-4.1.39/arch/s390/mm/fault.c
  2609. --- linux-4.1.39.orig/arch/s390/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  2610. +++ linux-4.1.39/arch/s390/mm/fault.c 2017-04-18 17:56:30.561395114 +0200
  2611. @@ -399,7 +399,7 @@
  2612. * user context.
  2613. */
  2614. fault = VM_FAULT_BADCONTEXT;
  2615. - if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
  2616. + if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm))
  2617. goto out;
  2618. address = trans_exc_code & __FAIL_ADDR_MASK;
  2619. diff -Nur linux-4.1.39.orig/arch/score/include/asm/uaccess.h linux-4.1.39/arch/score/include/asm/uaccess.h
  2620. --- linux-4.1.39.orig/arch/score/include/asm/uaccess.h 2017-03-13 21:04:36.000000000 +0100
  2621. +++ linux-4.1.39/arch/score/include/asm/uaccess.h 2017-04-18 17:56:30.561395114 +0200
  2622. @@ -36,7 +36,8 @@
  2623. * @addr: User space pointer to start of block to check
  2624. * @size: Size of block to check
  2625. *
  2626. - * Context: User context only. This function may sleep.
  2627. + * Context: User context only. This function may sleep if pagefaults are
  2628. + * enabled.
  2629. *
  2630. * Checks if a pointer to a block of memory in user space is valid.
  2631. *
  2632. @@ -61,7 +62,8 @@
  2633. * @x: Value to copy to user space.
  2634. * @ptr: Destination address, in user space.
  2635. *
  2636. - * Context: User context only. This function may sleep.
  2637. + * Context: User context only. This function may sleep if pagefaults are
  2638. + * enabled.
  2639. *
  2640. * This macro copies a single simple value from kernel space to user
  2641. * space. It supports simple types like char and int, but not larger
  2642. @@ -79,7 +81,8 @@
  2643. * @x: Variable to store result.
  2644. * @ptr: Source address, in user space.
  2645. *
  2646. - * Context: User context only. This function may sleep.
  2647. + * Context: User context only. This function may sleep if pagefaults are
  2648. + * enabled.
  2649. *
  2650. * This macro copies a single simple variable from user space to kernel
  2651. * space. It supports simple types like char and int, but not larger
  2652. @@ -98,7 +101,8 @@
  2653. * @x: Value to copy to user space.
  2654. * @ptr: Destination address, in user space.
  2655. *
  2656. - * Context: User context only. This function may sleep.
  2657. + * Context: User context only. This function may sleep if pagefaults are
  2658. + * enabled.
  2659. *
  2660. * This macro copies a single simple value from kernel space to user
  2661. * space. It supports simple types like char and int, but not larger
  2662. @@ -119,7 +123,8 @@
  2663. * @x: Variable to store result.
  2664. * @ptr: Source address, in user space.
  2665. *
  2666. - * Context: User context only. This function may sleep.
  2667. + * Context: User context only. This function may sleep if pagefaults are
  2668. + * enabled.
  2669. *
  2670. * This macro copies a single simple variable from user space to kernel
  2671. * space. It supports simple types like char and int, but not larger
  2672. diff -Nur linux-4.1.39.orig/arch/score/mm/fault.c linux-4.1.39/arch/score/mm/fault.c
  2673. --- linux-4.1.39.orig/arch/score/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  2674. +++ linux-4.1.39/arch/score/mm/fault.c 2017-04-18 17:56:30.561395114 +0200
  2675. @@ -34,6 +34,7 @@
  2676. #include <linux/string.h>
  2677. #include <linux/types.h>
  2678. #include <linux/ptrace.h>
  2679. +#include <linux/uaccess.h>
  2680. /*
  2681. * This routine handles page faults. It determines the address,
  2682. @@ -73,7 +74,7 @@
  2683. * If we're in an interrupt or have no user
  2684. * context, we must not take the fault..
  2685. */
  2686. - if (in_atomic() || !mm)
  2687. + if (pagefault_disabled() || !mm)
  2688. goto bad_area_nosemaphore;
  2689. if (user_mode(regs))
  2690. diff -Nur linux-4.1.39.orig/arch/sh/kernel/irq.c linux-4.1.39/arch/sh/kernel/irq.c
  2691. --- linux-4.1.39.orig/arch/sh/kernel/irq.c 2017-03-13 21:04:36.000000000 +0100
  2692. +++ linux-4.1.39/arch/sh/kernel/irq.c 2017-04-18 17:56:30.561395114 +0200
  2693. @@ -147,6 +147,7 @@
  2694. hardirq_ctx[cpu] = NULL;
  2695. }
  2696. +#ifndef CONFIG_PREEMPT_RT_FULL
  2697. void do_softirq_own_stack(void)
  2698. {
  2699. struct thread_info *curctx;
  2700. @@ -174,6 +175,7 @@
  2701. "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
  2702. );
  2703. }
  2704. +#endif
  2705. #else
  2706. static inline void handle_one_irq(unsigned int irq)
  2707. {
  2708. diff -Nur linux-4.1.39.orig/arch/sh/mm/fault.c linux-4.1.39/arch/sh/mm/fault.c
  2709. --- linux-4.1.39.orig/arch/sh/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  2710. +++ linux-4.1.39/arch/sh/mm/fault.c 2017-04-18 17:56:30.561395114 +0200
  2711. @@ -17,6 +17,7 @@
  2712. #include <linux/kprobes.h>
  2713. #include <linux/perf_event.h>
  2714. #include <linux/kdebug.h>
  2715. +#include <linux/uaccess.h>
  2716. #include <asm/io_trapped.h>
  2717. #include <asm/mmu_context.h>
  2718. #include <asm/tlbflush.h>
  2719. @@ -438,9 +439,9 @@
  2720. /*
  2721. * If we're in an interrupt, have no user context or are running
  2722. - * in an atomic region then we must not take the fault:
  2723. + * with pagefaults disabled then we must not take the fault:
  2724. */
  2725. - if (unlikely(in_atomic() || !mm)) {
  2726. + if (unlikely(faulthandler_disabled() || !mm)) {
  2727. bad_area_nosemaphore(regs, error_code, address);
  2728. return;
  2729. }
  2730. diff -Nur linux-4.1.39.orig/arch/sparc/Kconfig linux-4.1.39/arch/sparc/Kconfig
  2731. --- linux-4.1.39.orig/arch/sparc/Kconfig 2017-03-13 21:04:36.000000000 +0100
  2732. +++ linux-4.1.39/arch/sparc/Kconfig 2017-04-18 17:56:30.561395114 +0200
  2733. @@ -189,12 +189,10 @@
  2734. source kernel/Kconfig.hz
  2735. config RWSEM_GENERIC_SPINLOCK
  2736. - bool
  2737. - default y if SPARC32
  2738. + def_bool PREEMPT_RT_FULL
  2739. config RWSEM_XCHGADD_ALGORITHM
  2740. - bool
  2741. - default y if SPARC64
  2742. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  2743. config GENERIC_HWEIGHT
  2744. bool
  2745. diff -Nur linux-4.1.39.orig/arch/sparc/kernel/irq_64.c linux-4.1.39/arch/sparc/kernel/irq_64.c
  2746. --- linux-4.1.39.orig/arch/sparc/kernel/irq_64.c 2017-03-13 21:04:36.000000000 +0100
  2747. +++ linux-4.1.39/arch/sparc/kernel/irq_64.c 2017-04-18 17:56:30.561395114 +0200
  2748. @@ -849,6 +849,7 @@
  2749. set_irq_regs(old_regs);
  2750. }
  2751. +#ifndef CONFIG_PREEMPT_RT_FULL
  2752. void do_softirq_own_stack(void)
  2753. {
  2754. void *orig_sp, *sp = softirq_stack[smp_processor_id()];
  2755. @@ -863,6 +864,7 @@
  2756. __asm__ __volatile__("mov %0, %%sp"
  2757. : : "r" (orig_sp));
  2758. }
  2759. +#endif
  2760. #ifdef CONFIG_HOTPLUG_CPU
  2761. void fixup_irqs(void)
  2762. diff -Nur linux-4.1.39.orig/arch/sparc/mm/fault_32.c linux-4.1.39/arch/sparc/mm/fault_32.c
  2763. --- linux-4.1.39.orig/arch/sparc/mm/fault_32.c 2017-03-13 21:04:36.000000000 +0100
  2764. +++ linux-4.1.39/arch/sparc/mm/fault_32.c 2017-04-18 17:56:30.561395114 +0200
  2765. @@ -21,6 +21,7 @@
  2766. #include <linux/perf_event.h>
  2767. #include <linux/interrupt.h>
  2768. #include <linux/kdebug.h>
  2769. +#include <linux/uaccess.h>
  2770. #include <asm/page.h>
  2771. #include <asm/pgtable.h>
  2772. @@ -29,7 +30,6 @@
  2773. #include <asm/setup.h>
  2774. #include <asm/smp.h>
  2775. #include <asm/traps.h>
  2776. -#include <asm/uaccess.h>
  2777. #include "mm_32.h"
  2778. @@ -196,7 +196,7 @@
  2779. * If we're in an interrupt or have no user
  2780. * context, we must not take the fault..
  2781. */
  2782. - if (in_atomic() || !mm)
  2783. + if (pagefault_disabled() || !mm)
  2784. goto no_context;
  2785. perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
  2786. diff -Nur linux-4.1.39.orig/arch/sparc/mm/fault_64.c linux-4.1.39/arch/sparc/mm/fault_64.c
  2787. --- linux-4.1.39.orig/arch/sparc/mm/fault_64.c 2017-03-13 21:04:36.000000000 +0100
  2788. +++ linux-4.1.39/arch/sparc/mm/fault_64.c 2017-04-18 17:56:30.561395114 +0200
  2789. @@ -22,12 +22,12 @@
  2790. #include <linux/kdebug.h>
  2791. #include <linux/percpu.h>
  2792. #include <linux/context_tracking.h>
  2793. +#include <linux/uaccess.h>
  2794. #include <asm/page.h>
  2795. #include <asm/pgtable.h>
  2796. #include <asm/openprom.h>
  2797. #include <asm/oplib.h>
  2798. -#include <asm/uaccess.h>
  2799. #include <asm/asi.h>
  2800. #include <asm/lsu.h>
  2801. #include <asm/sections.h>
  2802. @@ -330,7 +330,7 @@
  2803. * If we're in an interrupt or have no user
  2804. * context, we must not take the fault..
  2805. */
  2806. - if (in_atomic() || !mm)
  2807. + if (faulthandler_disabled() || !mm)
  2808. goto intr_or_no_mm;
  2809. perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
  2810. diff -Nur linux-4.1.39.orig/arch/sparc/mm/highmem.c linux-4.1.39/arch/sparc/mm/highmem.c
  2811. --- linux-4.1.39.orig/arch/sparc/mm/highmem.c 2017-03-13 21:04:36.000000000 +0100
  2812. +++ linux-4.1.39/arch/sparc/mm/highmem.c 2017-04-18 17:56:30.561395114 +0200
  2813. @@ -53,7 +53,7 @@
  2814. unsigned long vaddr;
  2815. long idx, type;
  2816. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  2817. + preempt_disable();
  2818. pagefault_disable();
  2819. if (!PageHighMem(page))
  2820. return page_address(page);
  2821. @@ -91,6 +91,7 @@
  2822. if (vaddr < FIXADDR_START) { // FIXME
  2823. pagefault_enable();
  2824. + preempt_enable();
  2825. return;
  2826. }
  2827. @@ -126,5 +127,6 @@
  2828. kmap_atomic_idx_pop();
  2829. pagefault_enable();
  2830. + preempt_enable();
  2831. }
  2832. EXPORT_SYMBOL(__kunmap_atomic);
  2833. diff -Nur linux-4.1.39.orig/arch/sparc/mm/init_64.c linux-4.1.39/arch/sparc/mm/init_64.c
  2834. --- linux-4.1.39.orig/arch/sparc/mm/init_64.c 2017-03-13 21:04:36.000000000 +0100
  2835. +++ linux-4.1.39/arch/sparc/mm/init_64.c 2017-04-18 17:56:30.565395270 +0200
  2836. @@ -2738,7 +2738,7 @@
  2837. struct mm_struct *mm = current->mm;
  2838. struct tsb_config *tp;
  2839. - if (in_atomic() || !mm) {
  2840. + if (faulthandler_disabled() || !mm) {
  2841. const struct exception_table_entry *entry;
  2842. entry = search_exception_tables(regs->tpc);
  2843. diff -Nur linux-4.1.39.orig/arch/tile/include/asm/uaccess.h linux-4.1.39/arch/tile/include/asm/uaccess.h
  2844. --- linux-4.1.39.orig/arch/tile/include/asm/uaccess.h 2017-03-13 21:04:36.000000000 +0100
  2845. +++ linux-4.1.39/arch/tile/include/asm/uaccess.h 2017-04-18 17:56:30.565395270 +0200
  2846. @@ -78,7 +78,8 @@
  2847. * @addr: User space pointer to start of block to check
  2848. * @size: Size of block to check
  2849. *
  2850. - * Context: User context only. This function may sleep.
  2851. + * Context: User context only. This function may sleep if pagefaults are
  2852. + * enabled.
  2853. *
  2854. * Checks if a pointer to a block of memory in user space is valid.
  2855. *
  2856. @@ -192,7 +193,8 @@
  2857. * @x: Variable to store result.
  2858. * @ptr: Source address, in user space.
  2859. *
  2860. - * Context: User context only. This function may sleep.
  2861. + * Context: User context only. This function may sleep if pagefaults are
  2862. + * enabled.
  2863. *
  2864. * This macro copies a single simple variable from user space to kernel
  2865. * space. It supports simple types like char and int, but not larger
  2866. @@ -274,7 +276,8 @@
  2867. * @x: Value to copy to user space.
  2868. * @ptr: Destination address, in user space.
  2869. *
  2870. - * Context: User context only. This function may sleep.
  2871. + * Context: User context only. This function may sleep if pagefaults are
  2872. + * enabled.
  2873. *
  2874. * This macro copies a single simple value from kernel space to user
  2875. * space. It supports simple types like char and int, but not larger
  2876. @@ -330,7 +333,8 @@
  2877. * @from: Source address, in kernel space.
  2878. * @n: Number of bytes to copy.
  2879. *
  2880. - * Context: User context only. This function may sleep.
  2881. + * Context: User context only. This function may sleep if pagefaults are
  2882. + * enabled.
  2883. *
  2884. * Copy data from kernel space to user space. Caller must check
  2885. * the specified block with access_ok() before calling this function.
  2886. @@ -366,7 +370,8 @@
  2887. * @from: Source address, in user space.
  2888. * @n: Number of bytes to copy.
  2889. *
  2890. - * Context: User context only. This function may sleep.
  2891. + * Context: User context only. This function may sleep if pagefaults are
  2892. + * enabled.
  2893. *
  2894. * Copy data from user space to kernel space. Caller must check
  2895. * the specified block with access_ok() before calling this function.
  2896. @@ -437,7 +442,8 @@
  2897. * @from: Source address, in user space.
  2898. * @n: Number of bytes to copy.
  2899. *
  2900. - * Context: User context only. This function may sleep.
  2901. + * Context: User context only. This function may sleep if pagefaults are
  2902. + * enabled.
  2903. *
  2904. * Copy data from user space to user space. Caller must check
  2905. * the specified blocks with access_ok() before calling this function.
  2906. diff -Nur linux-4.1.39.orig/arch/tile/mm/fault.c linux-4.1.39/arch/tile/mm/fault.c
  2907. --- linux-4.1.39.orig/arch/tile/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  2908. +++ linux-4.1.39/arch/tile/mm/fault.c 2017-04-18 17:56:30.565395270 +0200
  2909. @@ -354,9 +354,9 @@
  2910. /*
  2911. * If we're in an interrupt, have no user context or are running in an
  2912. - * atomic region then we must not take the fault.
  2913. + * region with pagefaults disabled then we must not take the fault.
  2914. */
  2915. - if (in_atomic() || !mm) {
  2916. + if (pagefault_disabled() || !mm) {
  2917. vma = NULL; /* happy compiler */
  2918. goto bad_area_nosemaphore;
  2919. }
  2920. diff -Nur linux-4.1.39.orig/arch/tile/mm/highmem.c linux-4.1.39/arch/tile/mm/highmem.c
  2921. --- linux-4.1.39.orig/arch/tile/mm/highmem.c 2017-03-13 21:04:36.000000000 +0100
  2922. +++ linux-4.1.39/arch/tile/mm/highmem.c 2017-04-18 17:56:30.565395270 +0200
  2923. @@ -201,7 +201,7 @@
  2924. int idx, type;
  2925. pte_t *pte;
  2926. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  2927. + preempt_disable();
  2928. pagefault_disable();
  2929. /* Avoid icache flushes by disallowing atomic executable mappings. */
  2930. @@ -259,6 +259,7 @@
  2931. }
  2932. pagefault_enable();
  2933. + preempt_enable();
  2934. }
  2935. EXPORT_SYMBOL(__kunmap_atomic);
  2936. diff -Nur linux-4.1.39.orig/arch/um/kernel/trap.c linux-4.1.39/arch/um/kernel/trap.c
  2937. --- linux-4.1.39.orig/arch/um/kernel/trap.c 2017-03-13 21:04:36.000000000 +0100
  2938. +++ linux-4.1.39/arch/um/kernel/trap.c 2017-04-18 17:56:30.565395270 +0200
  2939. @@ -35,10 +35,10 @@
  2940. *code_out = SEGV_MAPERR;
  2941. /*
  2942. - * If the fault was during atomic operation, don't take the fault, just
  2943. + * If the fault was with pagefaults disabled, don't take the fault, just
  2944. * fail.
  2945. */
  2946. - if (in_atomic())
  2947. + if (faulthandler_disabled())
  2948. goto out_nosemaphore;
  2949. if (is_user)
  2950. diff -Nur linux-4.1.39.orig/arch/unicore32/mm/fault.c linux-4.1.39/arch/unicore32/mm/fault.c
  2951. --- linux-4.1.39.orig/arch/unicore32/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  2952. +++ linux-4.1.39/arch/unicore32/mm/fault.c 2017-04-18 17:56:30.565395270 +0200
  2953. @@ -218,7 +218,7 @@
  2954. * If we're in an interrupt or have no user
  2955. * context, we must not take the fault..
  2956. */
  2957. - if (in_atomic() || !mm)
  2958. + if (faulthandler_disabled() || !mm)
  2959. goto no_context;
  2960. if (user_mode(regs))
  2961. diff -Nur linux-4.1.39.orig/arch/x86/crypto/aesni-intel_glue.c linux-4.1.39/arch/x86/crypto/aesni-intel_glue.c
  2962. --- linux-4.1.39.orig/arch/x86/crypto/aesni-intel_glue.c 2017-03-13 21:04:36.000000000 +0100
  2963. +++ linux-4.1.39/arch/x86/crypto/aesni-intel_glue.c 2017-04-18 17:56:30.565395270 +0200
  2964. @@ -382,14 +382,14 @@
  2965. err = blkcipher_walk_virt(desc, &walk);
  2966. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  2967. - kernel_fpu_begin();
  2968. while ((nbytes = walk.nbytes)) {
  2969. + kernel_fpu_begin();
  2970. aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  2971. - nbytes & AES_BLOCK_MASK);
  2972. + nbytes & AES_BLOCK_MASK);
  2973. + kernel_fpu_end();
  2974. nbytes &= AES_BLOCK_SIZE - 1;
  2975. err = blkcipher_walk_done(desc, &walk, nbytes);
  2976. }
  2977. - kernel_fpu_end();
  2978. return err;
  2979. }
  2980. @@ -406,14 +406,14 @@
  2981. err = blkcipher_walk_virt(desc, &walk);
  2982. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  2983. - kernel_fpu_begin();
  2984. while ((nbytes = walk.nbytes)) {
  2985. + kernel_fpu_begin();
  2986. aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  2987. nbytes & AES_BLOCK_MASK);
  2988. + kernel_fpu_end();
  2989. nbytes &= AES_BLOCK_SIZE - 1;
  2990. err = blkcipher_walk_done(desc, &walk, nbytes);
  2991. }
  2992. - kernel_fpu_end();
  2993. return err;
  2994. }
  2995. @@ -430,14 +430,14 @@
  2996. err = blkcipher_walk_virt(desc, &walk);
  2997. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  2998. - kernel_fpu_begin();
  2999. while ((nbytes = walk.nbytes)) {
  3000. + kernel_fpu_begin();
  3001. aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  3002. nbytes & AES_BLOCK_MASK, walk.iv);
  3003. + kernel_fpu_end();
  3004. nbytes &= AES_BLOCK_SIZE - 1;
  3005. err = blkcipher_walk_done(desc, &walk, nbytes);
  3006. }
  3007. - kernel_fpu_end();
  3008. return err;
  3009. }
  3010. @@ -454,14 +454,14 @@
  3011. err = blkcipher_walk_virt(desc, &walk);
  3012. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  3013. - kernel_fpu_begin();
  3014. while ((nbytes = walk.nbytes)) {
  3015. + kernel_fpu_begin();
  3016. aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  3017. nbytes & AES_BLOCK_MASK, walk.iv);
  3018. + kernel_fpu_end();
  3019. nbytes &= AES_BLOCK_SIZE - 1;
  3020. err = blkcipher_walk_done(desc, &walk, nbytes);
  3021. }
  3022. - kernel_fpu_end();
  3023. return err;
  3024. }
  3025. @@ -513,18 +513,20 @@
  3026. err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
  3027. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  3028. - kernel_fpu_begin();
  3029. while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
  3030. + kernel_fpu_begin();
  3031. aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
  3032. nbytes & AES_BLOCK_MASK, walk.iv);
  3033. + kernel_fpu_end();
  3034. nbytes &= AES_BLOCK_SIZE - 1;
  3035. err = blkcipher_walk_done(desc, &walk, nbytes);
  3036. }
  3037. if (walk.nbytes) {
  3038. + kernel_fpu_begin();
  3039. ctr_crypt_final(ctx, &walk);
  3040. + kernel_fpu_end();
  3041. err = blkcipher_walk_done(desc, &walk, 0);
  3042. }
  3043. - kernel_fpu_end();
  3044. return err;
  3045. }
  3046. diff -Nur linux-4.1.39.orig/arch/x86/crypto/cast5_avx_glue.c linux-4.1.39/arch/x86/crypto/cast5_avx_glue.c
  3047. --- linux-4.1.39.orig/arch/x86/crypto/cast5_avx_glue.c 2017-03-13 21:04:36.000000000 +0100
  3048. +++ linux-4.1.39/arch/x86/crypto/cast5_avx_glue.c 2017-04-18 17:56:30.565395270 +0200
  3049. @@ -60,7 +60,7 @@
  3050. static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
  3051. bool enc)
  3052. {
  3053. - bool fpu_enabled = false;
  3054. + bool fpu_enabled;
  3055. struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  3056. const unsigned int bsize = CAST5_BLOCK_SIZE;
  3057. unsigned int nbytes;
  3058. @@ -76,7 +76,7 @@
  3059. u8 *wsrc = walk->src.virt.addr;
  3060. u8 *wdst = walk->dst.virt.addr;
  3061. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  3062. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  3063. /* Process multi-block batch */
  3064. if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
  3065. @@ -104,10 +104,9 @@
  3066. } while (nbytes >= bsize);
  3067. done:
  3068. + cast5_fpu_end(fpu_enabled);
  3069. err = blkcipher_walk_done(desc, walk, nbytes);
  3070. }
  3071. -
  3072. - cast5_fpu_end(fpu_enabled);
  3073. return err;
  3074. }
  3075. @@ -228,7 +227,7 @@
  3076. static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  3077. struct scatterlist *src, unsigned int nbytes)
  3078. {
  3079. - bool fpu_enabled = false;
  3080. + bool fpu_enabled;
  3081. struct blkcipher_walk walk;
  3082. int err;
  3083. @@ -237,12 +236,11 @@
  3084. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  3085. while ((nbytes = walk.nbytes)) {
  3086. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  3087. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  3088. nbytes = __cbc_decrypt(desc, &walk);
  3089. + cast5_fpu_end(fpu_enabled);
  3090. err = blkcipher_walk_done(desc, &walk, nbytes);
  3091. }
  3092. -
  3093. - cast5_fpu_end(fpu_enabled);
  3094. return err;
  3095. }
  3096. @@ -312,7 +310,7 @@
  3097. static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  3098. struct scatterlist *src, unsigned int nbytes)
  3099. {
  3100. - bool fpu_enabled = false;
  3101. + bool fpu_enabled;
  3102. struct blkcipher_walk walk;
  3103. int err;
  3104. @@ -321,13 +319,12 @@
  3105. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  3106. while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
  3107. - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
  3108. + fpu_enabled = cast5_fpu_begin(false, nbytes);
  3109. nbytes = __ctr_crypt(desc, &walk);
  3110. + cast5_fpu_end(fpu_enabled);
  3111. err = blkcipher_walk_done(desc, &walk, nbytes);
  3112. }
  3113. - cast5_fpu_end(fpu_enabled);
  3114. -
  3115. if (walk.nbytes) {
  3116. ctr_crypt_final(desc, &walk);
  3117. err = blkcipher_walk_done(desc, &walk, 0);
  3118. diff -Nur linux-4.1.39.orig/arch/x86/crypto/glue_helper.c linux-4.1.39/arch/x86/crypto/glue_helper.c
  3119. --- linux-4.1.39.orig/arch/x86/crypto/glue_helper.c 2017-03-13 21:04:36.000000000 +0100
  3120. +++ linux-4.1.39/arch/x86/crypto/glue_helper.c 2017-04-18 17:56:30.565395270 +0200
  3121. @@ -39,7 +39,7 @@
  3122. void *ctx = crypto_blkcipher_ctx(desc->tfm);
  3123. const unsigned int bsize = 128 / 8;
  3124. unsigned int nbytes, i, func_bytes;
  3125. - bool fpu_enabled = false;
  3126. + bool fpu_enabled;
  3127. int err;
  3128. err = blkcipher_walk_virt(desc, walk);
  3129. @@ -49,7 +49,7 @@
  3130. u8 *wdst = walk->dst.virt.addr;
  3131. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  3132. - desc, fpu_enabled, nbytes);
  3133. + desc, false, nbytes);
  3134. for (i = 0; i < gctx->num_funcs; i++) {
  3135. func_bytes = bsize * gctx->funcs[i].num_blocks;
  3136. @@ -71,10 +71,10 @@
  3137. }
  3138. done:
  3139. + glue_fpu_end(fpu_enabled);
  3140. err = blkcipher_walk_done(desc, walk, nbytes);
  3141. }
  3142. - glue_fpu_end(fpu_enabled);
  3143. return err;
  3144. }
  3145. @@ -194,7 +194,7 @@
  3146. struct scatterlist *src, unsigned int nbytes)
  3147. {
  3148. const unsigned int bsize = 128 / 8;
  3149. - bool fpu_enabled = false;
  3150. + bool fpu_enabled;
  3151. struct blkcipher_walk walk;
  3152. int err;
  3153. @@ -203,12 +203,12 @@
  3154. while ((nbytes = walk.nbytes)) {
  3155. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  3156. - desc, fpu_enabled, nbytes);
  3157. + desc, false, nbytes);
  3158. nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
  3159. + glue_fpu_end(fpu_enabled);
  3160. err = blkcipher_walk_done(desc, &walk, nbytes);
  3161. }
  3162. - glue_fpu_end(fpu_enabled);
  3163. return err;
  3164. }
  3165. EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
  3166. @@ -277,7 +277,7 @@
  3167. struct scatterlist *src, unsigned int nbytes)
  3168. {
  3169. const unsigned int bsize = 128 / 8;
  3170. - bool fpu_enabled = false;
  3171. + bool fpu_enabled;
  3172. struct blkcipher_walk walk;
  3173. int err;
  3174. @@ -286,13 +286,12 @@
  3175. while ((nbytes = walk.nbytes) >= bsize) {
  3176. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  3177. - desc, fpu_enabled, nbytes);
  3178. + desc, false, nbytes);
  3179. nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
  3180. + glue_fpu_end(fpu_enabled);
  3181. err = blkcipher_walk_done(desc, &walk, nbytes);
  3182. }
  3183. - glue_fpu_end(fpu_enabled);
  3184. -
  3185. if (walk.nbytes) {
  3186. glue_ctr_crypt_final_128bit(
  3187. gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
  3188. @@ -347,7 +346,7 @@
  3189. void *tweak_ctx, void *crypt_ctx)
  3190. {
  3191. const unsigned int bsize = 128 / 8;
  3192. - bool fpu_enabled = false;
  3193. + bool fpu_enabled;
  3194. struct blkcipher_walk walk;
  3195. int err;
  3196. @@ -360,21 +359,21 @@
  3197. /* set minimum length to bsize, for tweak_fn */
  3198. fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  3199. - desc, fpu_enabled,
  3200. + desc, false,
  3201. nbytes < bsize ? bsize : nbytes);
  3202. -
  3203. /* calculate first value of T */
  3204. tweak_fn(tweak_ctx, walk.iv, walk.iv);
  3205. + glue_fpu_end(fpu_enabled);
  3206. while (nbytes) {
  3207. + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
  3208. + desc, false, nbytes);
  3209. nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
  3210. + glue_fpu_end(fpu_enabled);
  3211. err = blkcipher_walk_done(desc, &walk, nbytes);
  3212. nbytes = walk.nbytes;
  3213. }
  3214. -
  3215. - glue_fpu_end(fpu_enabled);
  3216. -
  3217. return err;
  3218. }
  3219. EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
  3220. diff -Nur linux-4.1.39.orig/arch/x86/include/asm/preempt.h linux-4.1.39/arch/x86/include/asm/preempt.h
  3221. --- linux-4.1.39.orig/arch/x86/include/asm/preempt.h 2017-03-13 21:04:36.000000000 +0100
  3222. +++ linux-4.1.39/arch/x86/include/asm/preempt.h 2017-04-18 17:56:30.565395270 +0200
  3223. @@ -82,17 +82,46 @@
  3224. * a decrement which hits zero means we have no preempt_count and should
  3225. * reschedule.
  3226. */
  3227. -static __always_inline bool __preempt_count_dec_and_test(void)
  3228. +static __always_inline bool ____preempt_count_dec_and_test(void)
  3229. {
  3230. GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
  3231. }
  3232. +static __always_inline bool __preempt_count_dec_and_test(void)
  3233. +{
  3234. + if (____preempt_count_dec_and_test())
  3235. + return true;
  3236. +#ifdef CONFIG_PREEMPT_LAZY
  3237. + if (current_thread_info()->preempt_lazy_count)
  3238. + return false;
  3239. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  3240. +#else
  3241. + return false;
  3242. +#endif
  3243. +}
  3244. +
  3245. /*
  3246. * Returns true when we need to resched and can (barring IRQ state).
  3247. */
  3248. static __always_inline bool should_resched(int preempt_offset)
  3249. {
  3250. +#ifdef CONFIG_PREEMPT_LAZY
  3251. + u32 tmp;
  3252. +
  3253. + tmp = raw_cpu_read_4(__preempt_count);
  3254. + if (tmp == preempt_offset)
  3255. + return true;
  3256. +
  3257. + /* preempt count == 0 ? */
  3258. + tmp &= ~PREEMPT_NEED_RESCHED;
  3259. + if (tmp)
  3260. + return false;
  3261. + if (current_thread_info()->preempt_lazy_count)
  3262. + return false;
  3263. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  3264. +#else
  3265. return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
  3266. +#endif
  3267. }
  3268. #ifdef CONFIG_PREEMPT
  3269. diff -Nur linux-4.1.39.orig/arch/x86/include/asm/signal.h linux-4.1.39/arch/x86/include/asm/signal.h
  3270. --- linux-4.1.39.orig/arch/x86/include/asm/signal.h 2017-03-13 21:04:36.000000000 +0100
  3271. +++ linux-4.1.39/arch/x86/include/asm/signal.h 2017-04-18 17:56:30.565395270 +0200
  3272. @@ -23,6 +23,19 @@
  3273. unsigned long sig[_NSIG_WORDS];
  3274. } sigset_t;
  3275. +/*
  3276. + * Because some traps use the IST stack, we must keep preemption
  3277. + * disabled while calling do_trap(), but do_trap() may call
  3278. + * force_sig_info() which will grab the signal spin_locks for the
  3279. + * task, which in PREEMPT_RT_FULL are mutexes. By defining
  3280. + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
  3281. + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
  3282. + * trap.
  3283. + */
  3284. +#if defined(CONFIG_PREEMPT_RT_FULL)
  3285. +#define ARCH_RT_DELAYS_SIGNAL_SEND
  3286. +#endif
  3287. +
  3288. #ifndef CONFIG_COMPAT
  3289. typedef sigset_t compat_sigset_t;
  3290. #endif
  3291. diff -Nur linux-4.1.39.orig/arch/x86/include/asm/stackprotector.h linux-4.1.39/arch/x86/include/asm/stackprotector.h
  3292. --- linux-4.1.39.orig/arch/x86/include/asm/stackprotector.h 2017-03-13 21:04:36.000000000 +0100
  3293. +++ linux-4.1.39/arch/x86/include/asm/stackprotector.h 2017-04-18 17:56:30.565395270 +0200
  3294. @@ -57,7 +57,7 @@
  3295. */
  3296. static __always_inline void boot_init_stack_canary(void)
  3297. {
  3298. - u64 canary;
  3299. + u64 uninitialized_var(canary);
  3300. u64 tsc;
  3301. #ifdef CONFIG_X86_64
  3302. @@ -68,8 +68,16 @@
  3303. * of randomness. The TSC only matters for very early init,
  3304. * there it already has some randomness on most systems. Later
  3305. * on during the bootup the random pool has true entropy too.
  3306. + *
  3307. + * For preempt-rt we need to weaken the randomness a bit, as
  3308. + * we can't call into the random generator from atomic context
  3309. + * due to locking constraints. We just leave canary
  3310. + * uninitialized and use the TSC based randomness on top of
  3311. + * it.
  3312. */
  3313. +#ifndef CONFIG_PREEMPT_RT_FULL
  3314. get_random_bytes(&canary, sizeof(canary));
  3315. +#endif
  3316. tsc = __native_read_tsc();
  3317. canary += tsc + (tsc << 32UL);
  3318. diff -Nur linux-4.1.39.orig/arch/x86/include/asm/thread_info.h linux-4.1.39/arch/x86/include/asm/thread_info.h
  3319. --- linux-4.1.39.orig/arch/x86/include/asm/thread_info.h 2017-03-13 21:04:36.000000000 +0100
  3320. +++ linux-4.1.39/arch/x86/include/asm/thread_info.h 2017-04-18 17:56:30.565395270 +0200
  3321. @@ -55,6 +55,8 @@
  3322. __u32 status; /* thread synchronous flags */
  3323. __u32 cpu; /* current CPU */
  3324. int saved_preempt_count;
  3325. + int preempt_lazy_count; /* 0 => lazy preemptable
  3326. + <0 => BUG */
  3327. mm_segment_t addr_limit;
  3328. void __user *sysenter_return;
  3329. unsigned int sig_on_uaccess_error:1;
  3330. @@ -95,6 +97,7 @@
  3331. #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
  3332. #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
  3333. #define TIF_SECCOMP 8 /* secure computing */
  3334. +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
  3335. #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
  3336. #define TIF_UPROBE 12 /* breakpointed or singlestepping */
  3337. #define TIF_NOTSC 16 /* TSC is not accessible in userland */
  3338. @@ -119,6 +122,7 @@
  3339. #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
  3340. #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
  3341. #define _TIF_SECCOMP (1 << TIF_SECCOMP)
  3342. +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
  3343. #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
  3344. #define _TIF_UPROBE (1 << TIF_UPROBE)
  3345. #define _TIF_NOTSC (1 << TIF_NOTSC)
  3346. @@ -168,6 +172,8 @@
  3347. #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
  3348. #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
  3349. +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  3350. +
  3351. #define STACK_WARN (THREAD_SIZE/8)
  3352. /*
  3353. diff -Nur linux-4.1.39.orig/arch/x86/include/asm/uaccess_32.h linux-4.1.39/arch/x86/include/asm/uaccess_32.h
  3354. --- linux-4.1.39.orig/arch/x86/include/asm/uaccess_32.h 2017-03-13 21:04:36.000000000 +0100
  3355. +++ linux-4.1.39/arch/x86/include/asm/uaccess_32.h 2017-04-18 17:56:30.565395270 +0200
  3356. @@ -70,7 +70,8 @@
  3357. * @from: Source address, in kernel space.
  3358. * @n: Number of bytes to copy.
  3359. *
  3360. - * Context: User context only. This function may sleep.
  3361. + * Context: User context only. This function may sleep if pagefaults are
  3362. + * enabled.
  3363. *
  3364. * Copy data from kernel space to user space. Caller must check
  3365. * the specified block with access_ok() before calling this function.
  3366. @@ -117,7 +118,8 @@
  3367. * @from: Source address, in user space.
  3368. * @n: Number of bytes to copy.
  3369. *
  3370. - * Context: User context only. This function may sleep.
  3371. + * Context: User context only. This function may sleep if pagefaults are
  3372. + * enabled.
  3373. *
  3374. * Copy data from user space to kernel space. Caller must check
  3375. * the specified block with access_ok() before calling this function.
  3376. diff -Nur linux-4.1.39.orig/arch/x86/include/asm/uaccess.h linux-4.1.39/arch/x86/include/asm/uaccess.h
  3377. --- linux-4.1.39.orig/arch/x86/include/asm/uaccess.h 2017-03-13 21:04:36.000000000 +0100
  3378. +++ linux-4.1.39/arch/x86/include/asm/uaccess.h 2017-04-18 17:56:30.565395270 +0200
  3379. @@ -74,7 +74,8 @@
  3380. * @addr: User space pointer to start of block to check
  3381. * @size: Size of block to check
  3382. *
  3383. - * Context: User context only. This function may sleep.
  3384. + * Context: User context only. This function may sleep if pagefaults are
  3385. + * enabled.
  3386. *
  3387. * Checks if a pointer to a block of memory in user space is valid.
  3388. *
  3389. @@ -145,7 +146,8 @@
  3390. * @x: Variable to store result.
  3391. * @ptr: Source address, in user space.
  3392. *
  3393. - * Context: User context only. This function may sleep.
  3394. + * Context: User context only. This function may sleep if pagefaults are
  3395. + * enabled.
  3396. *
  3397. * This macro copies a single simple variable from user space to kernel
  3398. * space. It supports simple types like char and int, but not larger
  3399. @@ -240,7 +242,8 @@
  3400. * @x: Value to copy to user space.
  3401. * @ptr: Destination address, in user space.
  3402. *
  3403. - * Context: User context only. This function may sleep.
  3404. + * Context: User context only. This function may sleep if pagefaults are
  3405. + * enabled.
  3406. *
  3407. * This macro copies a single simple value from kernel space to user
  3408. * space. It supports simple types like char and int, but not larger
  3409. @@ -459,7 +462,8 @@
  3410. * @x: Variable to store result.
  3411. * @ptr: Source address, in user space.
  3412. *
  3413. - * Context: User context only. This function may sleep.
  3414. + * Context: User context only. This function may sleep if pagefaults are
  3415. + * enabled.
  3416. *
  3417. * This macro copies a single simple variable from user space to kernel
  3418. * space. It supports simple types like char and int, but not larger
  3419. @@ -483,7 +487,8 @@
  3420. * @x: Value to copy to user space.
  3421. * @ptr: Destination address, in user space.
  3422. *
  3423. - * Context: User context only. This function may sleep.
  3424. + * Context: User context only. This function may sleep if pagefaults are
  3425. + * enabled.
  3426. *
  3427. * This macro copies a single simple value from kernel space to user
  3428. * space. It supports simple types like char and int, but not larger
  3429. diff -Nur linux-4.1.39.orig/arch/x86/include/asm/uv/uv_bau.h linux-4.1.39/arch/x86/include/asm/uv/uv_bau.h
  3430. --- linux-4.1.39.orig/arch/x86/include/asm/uv/uv_bau.h 2017-03-13 21:04:36.000000000 +0100
  3431. +++ linux-4.1.39/arch/x86/include/asm/uv/uv_bau.h 2017-04-18 17:56:30.565395270 +0200
  3432. @@ -615,9 +615,9 @@
  3433. cycles_t send_message;
  3434. cycles_t period_end;
  3435. cycles_t period_time;
  3436. - spinlock_t uvhub_lock;
  3437. - spinlock_t queue_lock;
  3438. - spinlock_t disable_lock;
  3439. + raw_spinlock_t uvhub_lock;
  3440. + raw_spinlock_t queue_lock;
  3441. + raw_spinlock_t disable_lock;
  3442. /* tunables */
  3443. int max_concurr;
  3444. int max_concurr_const;
  3445. @@ -776,15 +776,15 @@
  3446. * to be lowered below the current 'v'. atomic_add_unless can only stop
  3447. * on equal.
  3448. */
  3449. -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
  3450. +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u)
  3451. {
  3452. - spin_lock(lock);
  3453. + raw_spin_lock(lock);
  3454. if (atomic_read(v) >= u) {
  3455. - spin_unlock(lock);
  3456. + raw_spin_unlock(lock);
  3457. return 0;
  3458. }
  3459. atomic_inc(v);
  3460. - spin_unlock(lock);
  3461. + raw_spin_unlock(lock);
  3462. return 1;
  3463. }
  3464. diff -Nur linux-4.1.39.orig/arch/x86/include/asm/uv/uv_hub.h linux-4.1.39/arch/x86/include/asm/uv/uv_hub.h
  3465. --- linux-4.1.39.orig/arch/x86/include/asm/uv/uv_hub.h 2017-03-13 21:04:36.000000000 +0100
  3466. +++ linux-4.1.39/arch/x86/include/asm/uv/uv_hub.h 2017-04-18 17:56:30.565395270 +0200
  3467. @@ -492,7 +492,7 @@
  3468. unsigned short nr_online_cpus;
  3469. unsigned short pnode;
  3470. short memory_nid;
  3471. - spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
  3472. + raw_spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
  3473. unsigned long nmi_count; /* obsolete, see uv_hub_nmi */
  3474. };
  3475. extern struct uv_blade_info *uv_blade_info;
  3476. diff -Nur linux-4.1.39.orig/arch/x86/Kconfig linux-4.1.39/arch/x86/Kconfig
  3477. --- linux-4.1.39.orig/arch/x86/Kconfig 2017-03-13 21:04:36.000000000 +0100
  3478. +++ linux-4.1.39/arch/x86/Kconfig 2017-04-18 17:56:30.565395270 +0200
  3479. @@ -22,6 +22,7 @@
  3480. ### Arch settings
  3481. config X86
  3482. def_bool y
  3483. + select HAVE_PREEMPT_LAZY
  3484. select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
  3485. select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
  3486. select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
  3487. @@ -203,8 +204,11 @@
  3488. def_bool y
  3489. depends on ISA_DMA_API
  3490. +config RWSEM_GENERIC_SPINLOCK
  3491. + def_bool PREEMPT_RT_FULL
  3492. +
  3493. config RWSEM_XCHGADD_ALGORITHM
  3494. - def_bool y
  3495. + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
  3496. config GENERIC_CALIBRATE_DELAY
  3497. def_bool y
  3498. @@ -838,7 +842,7 @@
  3499. config MAXSMP
  3500. bool "Enable Maximum number of SMP Processors and NUMA Nodes"
  3501. depends on X86_64 && SMP && DEBUG_KERNEL
  3502. - select CPUMASK_OFFSTACK
  3503. + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
  3504. ---help---
  3505. Enable maximum number of CPUS and NUMA Nodes for this architecture.
  3506. If unsure, say N.
  3507. diff -Nur linux-4.1.39.orig/arch/x86/kernel/acpi/boot.c linux-4.1.39/arch/x86/kernel/acpi/boot.c
  3508. --- linux-4.1.39.orig/arch/x86/kernel/acpi/boot.c 2017-03-13 21:04:36.000000000 +0100
  3509. +++ linux-4.1.39/arch/x86/kernel/acpi/boot.c 2017-04-18 17:56:30.565395270 +0200
  3510. @@ -87,7 +87,9 @@
  3511. * ->ioapic_mutex
  3512. * ->ioapic_lock
  3513. */
  3514. +#ifdef CONFIG_X86_IO_APIC
  3515. static DEFINE_MUTEX(acpi_ioapic_lock);
  3516. +#endif
  3517. /* --------------------------------------------------------------------------
  3518. Boot-time Configuration
  3519. diff -Nur linux-4.1.39.orig/arch/x86/kernel/apic/io_apic.c linux-4.1.39/arch/x86/kernel/apic/io_apic.c
  3520. --- linux-4.1.39.orig/arch/x86/kernel/apic/io_apic.c 2017-03-13 21:04:36.000000000 +0100
  3521. +++ linux-4.1.39/arch/x86/kernel/apic/io_apic.c 2017-04-18 17:56:30.565395270 +0200
  3522. @@ -1891,7 +1891,8 @@
  3523. static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
  3524. {
  3525. /* If we are moving the irq we need to mask it */
  3526. - if (unlikely(irqd_is_setaffinity_pending(data))) {
  3527. + if (unlikely(irqd_is_setaffinity_pending(data) &&
  3528. + !irqd_irq_inprogress(data))) {
  3529. mask_ioapic(cfg);
  3530. return true;
  3531. }
  3532. diff -Nur linux-4.1.39.orig/arch/x86/kernel/apic/x2apic_uv_x.c linux-4.1.39/arch/x86/kernel/apic/x2apic_uv_x.c
  3533. --- linux-4.1.39.orig/arch/x86/kernel/apic/x2apic_uv_x.c 2017-03-13 21:04:36.000000000 +0100
  3534. +++ linux-4.1.39/arch/x86/kernel/apic/x2apic_uv_x.c 2017-04-18 17:56:30.569395424 +0200
  3535. @@ -949,7 +949,7 @@
  3536. uv_blade_info[blade].pnode = pnode;
  3537. uv_blade_info[blade].nr_possible_cpus = 0;
  3538. uv_blade_info[blade].nr_online_cpus = 0;
  3539. - spin_lock_init(&uv_blade_info[blade].nmi_lock);
  3540. + raw_spin_lock_init(&uv_blade_info[blade].nmi_lock);
  3541. min_pnode = min(pnode, min_pnode);
  3542. max_pnode = max(pnode, max_pnode);
  3543. blade++;
  3544. diff -Nur linux-4.1.39.orig/arch/x86/kernel/asm-offsets.c linux-4.1.39/arch/x86/kernel/asm-offsets.c
  3545. --- linux-4.1.39.orig/arch/x86/kernel/asm-offsets.c 2017-03-13 21:04:36.000000000 +0100
  3546. +++ linux-4.1.39/arch/x86/kernel/asm-offsets.c 2017-04-18 17:56:30.569395424 +0200
  3547. @@ -32,6 +32,7 @@
  3548. OFFSET(TI_flags, thread_info, flags);
  3549. OFFSET(TI_status, thread_info, status);
  3550. OFFSET(TI_addr_limit, thread_info, addr_limit);
  3551. + OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count);
  3552. BLANK();
  3553. OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
  3554. @@ -71,4 +72,5 @@
  3555. BLANK();
  3556. DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
  3557. + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED);
  3558. }
  3559. diff -Nur linux-4.1.39.orig/arch/x86/kernel/cpu/mcheck/mce.c linux-4.1.39/arch/x86/kernel/cpu/mcheck/mce.c
  3560. --- linux-4.1.39.orig/arch/x86/kernel/cpu/mcheck/mce.c 2017-03-13 21:04:36.000000000 +0100
  3561. +++ linux-4.1.39/arch/x86/kernel/cpu/mcheck/mce.c 2017-04-18 17:56:30.569395424 +0200
  3562. @@ -41,6 +41,8 @@
  3563. #include <linux/debugfs.h>
  3564. #include <linux/irq_work.h>
  3565. #include <linux/export.h>
  3566. +#include <linux/jiffies.h>
  3567. +#include <linux/work-simple.h>
  3568. #include <asm/processor.h>
  3569. #include <asm/traps.h>
  3570. @@ -1267,7 +1269,7 @@
  3571. static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
  3572. static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
  3573. -static DEFINE_PER_CPU(struct timer_list, mce_timer);
  3574. +static DEFINE_PER_CPU(struct hrtimer, mce_timer);
  3575. static unsigned long mce_adjust_timer_default(unsigned long interval)
  3576. {
  3577. @@ -1276,32 +1278,18 @@
  3578. static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
  3579. -static void __restart_timer(struct timer_list *t, unsigned long interval)
  3580. +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval)
  3581. {
  3582. - unsigned long when = jiffies + interval;
  3583. - unsigned long flags;
  3584. -
  3585. - local_irq_save(flags);
  3586. -
  3587. - if (timer_pending(t)) {
  3588. - if (time_before(when, t->expires))
  3589. - mod_timer_pinned(t, when);
  3590. - } else {
  3591. - t->expires = round_jiffies(when);
  3592. - add_timer_on(t, smp_processor_id());
  3593. - }
  3594. -
  3595. - local_irq_restore(flags);
  3596. + if (!interval)
  3597. + return HRTIMER_NORESTART;
  3598. + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval)));
  3599. + return HRTIMER_RESTART;
  3600. }
  3601. -static void mce_timer_fn(unsigned long data)
  3602. +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
  3603. {
  3604. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  3605. - int cpu = smp_processor_id();
  3606. unsigned long iv;
  3607. - WARN_ON(cpu != data);
  3608. -
  3609. iv = __this_cpu_read(mce_next_interval);
  3610. if (mce_available(this_cpu_ptr(&cpu_info))) {
  3611. @@ -1324,7 +1312,7 @@
  3612. done:
  3613. __this_cpu_write(mce_next_interval, iv);
  3614. - __restart_timer(t, iv);
  3615. + return __restart_timer(timer, iv);
  3616. }
  3617. /*
  3618. @@ -1332,7 +1320,7 @@
  3619. */
  3620. void mce_timer_kick(unsigned long interval)
  3621. {
  3622. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  3623. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  3624. unsigned long iv = __this_cpu_read(mce_next_interval);
  3625. __restart_timer(t, interval);
  3626. @@ -1347,7 +1335,7 @@
  3627. int cpu;
  3628. for_each_online_cpu(cpu)
  3629. - del_timer_sync(&per_cpu(mce_timer, cpu));
  3630. + hrtimer_cancel(&per_cpu(mce_timer, cpu));
  3631. }
  3632. static void mce_do_trigger(struct work_struct *work)
  3633. @@ -1357,6 +1345,56 @@
  3634. static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
  3635. +static void __mce_notify_work(struct swork_event *event)
  3636. +{
  3637. + /* Not more than two messages every minute */
  3638. + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  3639. +
  3640. + /* wake processes polling /dev/mcelog */
  3641. + wake_up_interruptible(&mce_chrdev_wait);
  3642. +
  3643. + /*
  3644. + * There is no risk of missing notifications because
  3645. + * work_pending is always cleared before the function is
  3646. + * executed.
  3647. + */
  3648. + if (mce_helper[0] && !work_pending(&mce_trigger_work))
  3649. + schedule_work(&mce_trigger_work);
  3650. +
  3651. + if (__ratelimit(&ratelimit))
  3652. + pr_info(HW_ERR "Machine check events logged\n");
  3653. +}
  3654. +
  3655. +#ifdef CONFIG_PREEMPT_RT_FULL
  3656. +static bool notify_work_ready __read_mostly;
  3657. +static struct swork_event notify_work;
  3658. +
  3659. +static int mce_notify_work_init(void)
  3660. +{
  3661. + int err;
  3662. +
  3663. + err = swork_get();
  3664. + if (err)
  3665. + return err;
  3666. +
  3667. + INIT_SWORK(&notify_work, __mce_notify_work);
  3668. + notify_work_ready = true;
  3669. + return 0;
  3670. +}
  3671. +
  3672. +static void mce_notify_work(void)
  3673. +{
  3674. + if (notify_work_ready)
  3675. + swork_queue(&notify_work);
  3676. +}
  3677. +#else
  3678. +static void mce_notify_work(void)
  3679. +{
  3680. + __mce_notify_work(NULL);
  3681. +}
  3682. +static inline int mce_notify_work_init(void) { return 0; }
  3683. +#endif
  3684. +
  3685. /*
  3686. * Notify the user(s) about new machine check events.
  3687. * Can be called from interrupt context, but not from machine check/NMI
  3688. @@ -1364,19 +1402,8 @@
  3689. */
  3690. int mce_notify_irq(void)
  3691. {
  3692. - /* Not more than two messages every minute */
  3693. - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
  3694. -
  3695. if (test_and_clear_bit(0, &mce_need_notify)) {
  3696. - /* wake processes polling /dev/mcelog */
  3697. - wake_up_interruptible(&mce_chrdev_wait);
  3698. -
  3699. - if (mce_helper[0])
  3700. - schedule_work(&mce_trigger_work);
  3701. -
  3702. - if (__ratelimit(&ratelimit))
  3703. - pr_info(HW_ERR "Machine check events logged\n");
  3704. -
  3705. + mce_notify_work();
  3706. return 1;
  3707. }
  3708. return 0;
  3709. @@ -1649,7 +1676,7 @@
  3710. }
  3711. }
  3712. -static void mce_start_timer(unsigned int cpu, struct timer_list *t)
  3713. +static void mce_start_timer(unsigned int cpu, struct hrtimer *t)
  3714. {
  3715. unsigned long iv = check_interval * HZ;
  3716. @@ -1658,16 +1685,17 @@
  3717. per_cpu(mce_next_interval, cpu) = iv;
  3718. - t->expires = round_jiffies(jiffies + iv);
  3719. - add_timer_on(t, cpu);
  3720. + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL),
  3721. + 0, HRTIMER_MODE_REL_PINNED);
  3722. }
  3723. static void __mcheck_cpu_init_timer(void)
  3724. {
  3725. - struct timer_list *t = this_cpu_ptr(&mce_timer);
  3726. + struct hrtimer *t = this_cpu_ptr(&mce_timer);
  3727. unsigned int cpu = smp_processor_id();
  3728. - setup_timer(t, mce_timer_fn, cpu);
  3729. + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  3730. + t->function = mce_timer_fn;
  3731. mce_start_timer(cpu, t);
  3732. }
  3733. @@ -2345,6 +2373,8 @@
  3734. if (!mce_available(raw_cpu_ptr(&cpu_info)))
  3735. return;
  3736. + hrtimer_cancel(this_cpu_ptr(&mce_timer));
  3737. +
  3738. if (!(action & CPU_TASKS_FROZEN))
  3739. cmci_clear();
  3740. for (i = 0; i < mca_cfg.banks; i++) {
  3741. @@ -2371,6 +2401,7 @@
  3742. if (b->init)
  3743. wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
  3744. }
  3745. + __mcheck_cpu_init_timer();
  3746. }
  3747. /* Get notified when a cpu comes on/off. Be hotplug friendly. */
  3748. @@ -2378,7 +2409,6 @@
  3749. mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  3750. {
  3751. unsigned int cpu = (unsigned long)hcpu;
  3752. - struct timer_list *t = &per_cpu(mce_timer, cpu);
  3753. switch (action & ~CPU_TASKS_FROZEN) {
  3754. case CPU_ONLINE:
  3755. @@ -2398,11 +2428,9 @@
  3756. break;
  3757. case CPU_DOWN_PREPARE:
  3758. smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
  3759. - del_timer_sync(t);
  3760. break;
  3761. case CPU_DOWN_FAILED:
  3762. smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
  3763. - mce_start_timer(cpu, t);
  3764. break;
  3765. }
  3766. @@ -2441,6 +2469,10 @@
  3767. goto err_out;
  3768. }
  3769. + err = mce_notify_work_init();
  3770. + if (err)
  3771. + goto err_out;
  3772. +
  3773. if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
  3774. err = -ENOMEM;
  3775. goto err_out;
  3776. diff -Nur linux-4.1.39.orig/arch/x86/kernel/cpu/perf_event_intel_rapl.c linux-4.1.39/arch/x86/kernel/cpu/perf_event_intel_rapl.c
  3777. --- linux-4.1.39.orig/arch/x86/kernel/cpu/perf_event_intel_rapl.c 2017-03-13 21:04:36.000000000 +0100
  3778. +++ linux-4.1.39/arch/x86/kernel/cpu/perf_event_intel_rapl.c 2017-04-18 17:56:30.569395424 +0200
  3779. @@ -119,7 +119,7 @@
  3780. };
  3781. struct rapl_pmu {
  3782. - spinlock_t lock;
  3783. + raw_spinlock_t lock;
  3784. int n_active; /* number of active events */
  3785. struct list_head active_list;
  3786. struct pmu *pmu; /* pointer to rapl_pmu_class */
  3787. @@ -223,13 +223,13 @@
  3788. if (!pmu->n_active)
  3789. return HRTIMER_NORESTART;
  3790. - spin_lock_irqsave(&pmu->lock, flags);
  3791. + raw_spin_lock_irqsave(&pmu->lock, flags);
  3792. list_for_each_entry(event, &pmu->active_list, active_entry) {
  3793. rapl_event_update(event);
  3794. }
  3795. - spin_unlock_irqrestore(&pmu->lock, flags);
  3796. + raw_spin_unlock_irqrestore(&pmu->lock, flags);
  3797. hrtimer_forward_now(hrtimer, pmu->timer_interval);
  3798. @@ -266,9 +266,9 @@
  3799. struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
  3800. unsigned long flags;
  3801. - spin_lock_irqsave(&pmu->lock, flags);
  3802. + raw_spin_lock_irqsave(&pmu->lock, flags);
  3803. __rapl_pmu_event_start(pmu, event);
  3804. - spin_unlock_irqrestore(&pmu->lock, flags);
  3805. + raw_spin_unlock_irqrestore(&pmu->lock, flags);
  3806. }
  3807. static void rapl_pmu_event_stop(struct perf_event *event, int mode)
  3808. @@ -277,7 +277,7 @@
  3809. struct hw_perf_event *hwc = &event->hw;
  3810. unsigned long flags;
  3811. - spin_lock_irqsave(&pmu->lock, flags);
  3812. + raw_spin_lock_irqsave(&pmu->lock, flags);
  3813. /* mark event as deactivated and stopped */
  3814. if (!(hwc->state & PERF_HES_STOPPED)) {
  3815. @@ -302,7 +302,7 @@
  3816. hwc->state |= PERF_HES_UPTODATE;
  3817. }
  3818. - spin_unlock_irqrestore(&pmu->lock, flags);
  3819. + raw_spin_unlock_irqrestore(&pmu->lock, flags);
  3820. }
  3821. static int rapl_pmu_event_add(struct perf_event *event, int mode)
  3822. @@ -311,14 +311,14 @@
  3823. struct hw_perf_event *hwc = &event->hw;
  3824. unsigned long flags;
  3825. - spin_lock_irqsave(&pmu->lock, flags);
  3826. + raw_spin_lock_irqsave(&pmu->lock, flags);
  3827. hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
  3828. if (mode & PERF_EF_START)
  3829. __rapl_pmu_event_start(pmu, event);
  3830. - spin_unlock_irqrestore(&pmu->lock, flags);
  3831. + raw_spin_unlock_irqrestore(&pmu->lock, flags);
  3832. return 0;
  3833. }
  3834. @@ -594,7 +594,7 @@
  3835. pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
  3836. if (!pmu)
  3837. return -1;
  3838. - spin_lock_init(&pmu->lock);
  3839. + raw_spin_lock_init(&pmu->lock);
  3840. INIT_LIST_HEAD(&pmu->active_list);
  3841. diff -Nur linux-4.1.39.orig/arch/x86/kernel/dumpstack_32.c linux-4.1.39/arch/x86/kernel/dumpstack_32.c
  3842. --- linux-4.1.39.orig/arch/x86/kernel/dumpstack_32.c 2017-03-13 21:04:36.000000000 +0100
  3843. +++ linux-4.1.39/arch/x86/kernel/dumpstack_32.c 2017-04-18 17:56:30.569395424 +0200
  3844. @@ -42,7 +42,7 @@
  3845. unsigned long *stack, unsigned long bp,
  3846. const struct stacktrace_ops *ops, void *data)
  3847. {
  3848. - const unsigned cpu = get_cpu();
  3849. + const unsigned cpu = get_cpu_light();
  3850. int graph = 0;
  3851. u32 *prev_esp;
  3852. @@ -86,7 +86,7 @@
  3853. break;
  3854. touch_nmi_watchdog();
  3855. }
  3856. - put_cpu();
  3857. + put_cpu_light();
  3858. }
  3859. EXPORT_SYMBOL(dump_trace);
  3860. diff -Nur linux-4.1.39.orig/arch/x86/kernel/dumpstack_64.c linux-4.1.39/arch/x86/kernel/dumpstack_64.c
  3861. --- linux-4.1.39.orig/arch/x86/kernel/dumpstack_64.c 2017-03-13 21:04:36.000000000 +0100
  3862. +++ linux-4.1.39/arch/x86/kernel/dumpstack_64.c 2017-04-18 17:56:30.569395424 +0200
  3863. @@ -152,7 +152,7 @@
  3864. unsigned long *stack, unsigned long bp,
  3865. const struct stacktrace_ops *ops, void *data)
  3866. {
  3867. - const unsigned cpu = get_cpu();
  3868. + const unsigned cpu = get_cpu_light();
  3869. struct thread_info *tinfo;
  3870. unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
  3871. unsigned long dummy;
  3872. @@ -241,7 +241,7 @@
  3873. * This handles the process stack:
  3874. */
  3875. bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
  3876. - put_cpu();
  3877. + put_cpu_light();
  3878. }
  3879. EXPORT_SYMBOL(dump_trace);
  3880. @@ -255,7 +255,7 @@
  3881. int cpu;
  3882. int i;
  3883. - preempt_disable();
  3884. + migrate_disable();
  3885. cpu = smp_processor_id();
  3886. irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
  3887. @@ -291,7 +291,7 @@
  3888. pr_cont(" %016lx", *stack++);
  3889. touch_nmi_watchdog();
  3890. }
  3891. - preempt_enable();
  3892. + migrate_enable();
  3893. pr_cont("\n");
  3894. show_trace_log_lvl(task, regs, sp, bp, log_lvl);
  3895. diff -Nur linux-4.1.39.orig/arch/x86/kernel/entry_32.S linux-4.1.39/arch/x86/kernel/entry_32.S
  3896. --- linux-4.1.39.orig/arch/x86/kernel/entry_32.S 2017-03-13 21:04:36.000000000 +0100
  3897. +++ linux-4.1.39/arch/x86/kernel/entry_32.S 2017-04-18 17:56:30.569395424 +0200
  3898. @@ -359,8 +359,24 @@
  3899. ENTRY(resume_kernel)
  3900. DISABLE_INTERRUPTS(CLBR_ANY)
  3901. need_resched:
  3902. + # preempt count == 0 + NEED_RS set?
  3903. cmpl $0,PER_CPU_VAR(__preempt_count)
  3904. +#ifndef CONFIG_PREEMPT_LAZY
  3905. jnz restore_all
  3906. +#else
  3907. + jz test_int_off
  3908. +
  3909. + # atleast preempt count == 0 ?
  3910. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  3911. + jne restore_all
  3912. +
  3913. + cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
  3914. + jnz restore_all
  3915. +
  3916. + testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp)
  3917. + jz restore_all
  3918. +test_int_off:
  3919. +#endif
  3920. testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
  3921. jz restore_all
  3922. call preempt_schedule_irq
  3923. @@ -594,7 +610,7 @@
  3924. ALIGN
  3925. RING0_PTREGS_FRAME # can't unwind into user space anyway
  3926. work_pending:
  3927. - testb $_TIF_NEED_RESCHED, %cl
  3928. + testl $_TIF_NEED_RESCHED_MASK, %ecx
  3929. jz work_notifysig
  3930. work_resched:
  3931. call schedule
  3932. @@ -607,7 +623,7 @@
  3933. andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
  3934. # than syscall tracing?
  3935. jz restore_all
  3936. - testb $_TIF_NEED_RESCHED, %cl
  3937. + testl $_TIF_NEED_RESCHED_MASK, %ecx
  3938. jnz work_resched
  3939. work_notifysig: # deal with pending signals and
  3940. diff -Nur linux-4.1.39.orig/arch/x86/kernel/entry_64.S linux-4.1.39/arch/x86/kernel/entry_64.S
  3941. --- linux-4.1.39.orig/arch/x86/kernel/entry_64.S 2017-03-13 21:04:36.000000000 +0100
  3942. +++ linux-4.1.39/arch/x86/kernel/entry_64.S 2017-04-18 17:56:30.569395424 +0200
  3943. @@ -370,8 +370,8 @@
  3944. /* First do a reschedule test. */
  3945. /* edx: work, edi: workmask */
  3946. int_careful:
  3947. - bt $TIF_NEED_RESCHED,%edx
  3948. - jnc int_very_careful
  3949. + testl $_TIF_NEED_RESCHED_MASK,%edx
  3950. + jz int_very_careful
  3951. TRACE_IRQS_ON
  3952. ENABLE_INTERRUPTS(CLBR_NONE)
  3953. pushq_cfi %rdi
  3954. @@ -776,7 +776,23 @@
  3955. bt $9,EFLAGS(%rsp) /* interrupts were off? */
  3956. jnc 1f
  3957. 0: cmpl $0,PER_CPU_VAR(__preempt_count)
  3958. +#ifndef CONFIG_PREEMPT_LAZY
  3959. jnz 1f
  3960. +#else
  3961. + jz do_preempt_schedule_irq
  3962. +
  3963. + # atleast preempt count == 0 ?
  3964. + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
  3965. + jnz 1f
  3966. +
  3967. + GET_THREAD_INFO(%rcx)
  3968. + cmpl $0, TI_preempt_lazy_count(%rcx)
  3969. + jnz 1f
  3970. +
  3971. + bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
  3972. + jnc 1f
  3973. +do_preempt_schedule_irq:
  3974. +#endif
  3975. call preempt_schedule_irq
  3976. jmp 0b
  3977. 1:
  3978. @@ -844,8 +860,8 @@
  3979. /* edi: workmask, edx: work */
  3980. retint_careful:
  3981. CFI_RESTORE_STATE
  3982. - bt $TIF_NEED_RESCHED,%edx
  3983. - jnc retint_signal
  3984. + testl $_TIF_NEED_RESCHED_MASK,%edx
  3985. + jz retint_signal
  3986. TRACE_IRQS_ON
  3987. ENABLE_INTERRUPTS(CLBR_NONE)
  3988. pushq_cfi %rdi
  3989. @@ -1118,6 +1134,7 @@
  3990. jmp 2b
  3991. .previous
  3992. +#ifndef CONFIG_PREEMPT_RT_FULL
  3993. /* Call softirq on interrupt stack. Interrupts are off. */
  3994. ENTRY(do_softirq_own_stack)
  3995. CFI_STARTPROC
  3996. @@ -1137,6 +1154,7 @@
  3997. ret
  3998. CFI_ENDPROC
  3999. END(do_softirq_own_stack)
  4000. +#endif
  4001. #ifdef CONFIG_XEN
  4002. idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
  4003. diff -Nur linux-4.1.39.orig/arch/x86/kernel/irq_32.c linux-4.1.39/arch/x86/kernel/irq_32.c
  4004. --- linux-4.1.39.orig/arch/x86/kernel/irq_32.c 2017-03-13 21:04:36.000000000 +0100
  4005. +++ linux-4.1.39/arch/x86/kernel/irq_32.c 2017-04-18 17:56:30.569395424 +0200
  4006. @@ -135,6 +135,7 @@
  4007. cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
  4008. }
  4009. +#ifndef CONFIG_PREEMPT_RT_FULL
  4010. void do_softirq_own_stack(void)
  4011. {
  4012. struct thread_info *curstk;
  4013. @@ -153,6 +154,7 @@
  4014. call_on_stack(__do_softirq, isp);
  4015. }
  4016. +#endif
  4017. bool handle_irq(unsigned irq, struct pt_regs *regs)
  4018. {
  4019. diff -Nur linux-4.1.39.orig/arch/x86/kernel/kvm.c linux-4.1.39/arch/x86/kernel/kvm.c
  4020. --- linux-4.1.39.orig/arch/x86/kernel/kvm.c 2017-03-13 21:04:36.000000000 +0100
  4021. +++ linux-4.1.39/arch/x86/kernel/kvm.c 2017-04-18 17:56:30.569395424 +0200
  4022. @@ -36,6 +36,7 @@
  4023. #include <linux/kprobes.h>
  4024. #include <linux/debugfs.h>
  4025. #include <linux/nmi.h>
  4026. +#include <linux/wait-simple.h>
  4027. #include <asm/timer.h>
  4028. #include <asm/cpu.h>
  4029. #include <asm/traps.h>
  4030. @@ -91,14 +92,14 @@
  4031. struct kvm_task_sleep_node {
  4032. struct hlist_node link;
  4033. - wait_queue_head_t wq;
  4034. + struct swait_head wq;
  4035. u32 token;
  4036. int cpu;
  4037. bool halted;
  4038. };
  4039. static struct kvm_task_sleep_head {
  4040. - spinlock_t lock;
  4041. + raw_spinlock_t lock;
  4042. struct hlist_head list;
  4043. } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
  4044. @@ -122,17 +123,17 @@
  4045. u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
  4046. struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
  4047. struct kvm_task_sleep_node n, *e;
  4048. - DEFINE_WAIT(wait);
  4049. + DEFINE_SWAITER(wait);
  4050. rcu_irq_enter();
  4051. - spin_lock(&b->lock);
  4052. + raw_spin_lock(&b->lock);
  4053. e = _find_apf_task(b, token);
  4054. if (e) {
  4055. /* dummy entry exist -> wake up was delivered ahead of PF */
  4056. hlist_del(&e->link);
  4057. kfree(e);
  4058. - spin_unlock(&b->lock);
  4059. + raw_spin_unlock(&b->lock);
  4060. rcu_irq_exit();
  4061. return;
  4062. @@ -141,13 +142,13 @@
  4063. n.token = token;
  4064. n.cpu = smp_processor_id();
  4065. n.halted = is_idle_task(current) || preempt_count() > 1;
  4066. - init_waitqueue_head(&n.wq);
  4067. + init_swait_head(&n.wq);
  4068. hlist_add_head(&n.link, &b->list);
  4069. - spin_unlock(&b->lock);
  4070. + raw_spin_unlock(&b->lock);
  4071. for (;;) {
  4072. if (!n.halted)
  4073. - prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
  4074. + swait_prepare(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
  4075. if (hlist_unhashed(&n.link))
  4076. break;
  4077. @@ -166,7 +167,7 @@
  4078. }
  4079. }
  4080. if (!n.halted)
  4081. - finish_wait(&n.wq, &wait);
  4082. + swait_finish(&n.wq, &wait);
  4083. rcu_irq_exit();
  4084. return;
  4085. @@ -178,8 +179,8 @@
  4086. hlist_del_init(&n->link);
  4087. if (n->halted)
  4088. smp_send_reschedule(n->cpu);
  4089. - else if (waitqueue_active(&n->wq))
  4090. - wake_up(&n->wq);
  4091. + else if (swaitqueue_active(&n->wq))
  4092. + swait_wake(&n->wq);
  4093. }
  4094. static void apf_task_wake_all(void)
  4095. @@ -189,14 +190,14 @@
  4096. for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
  4097. struct hlist_node *p, *next;
  4098. struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
  4099. - spin_lock(&b->lock);
  4100. + raw_spin_lock(&b->lock);
  4101. hlist_for_each_safe(p, next, &b->list) {
  4102. struct kvm_task_sleep_node *n =
  4103. hlist_entry(p, typeof(*n), link);
  4104. if (n->cpu == smp_processor_id())
  4105. apf_task_wake_one(n);
  4106. }
  4107. - spin_unlock(&b->lock);
  4108. + raw_spin_unlock(&b->lock);
  4109. }
  4110. }
  4111. @@ -212,7 +213,7 @@
  4112. }
  4113. again:
  4114. - spin_lock(&b->lock);
  4115. + raw_spin_lock(&b->lock);
  4116. n = _find_apf_task(b, token);
  4117. if (!n) {
  4118. /*
  4119. @@ -225,17 +226,17 @@
  4120. * Allocation failed! Busy wait while other cpu
  4121. * handles async PF.
  4122. */
  4123. - spin_unlock(&b->lock);
  4124. + raw_spin_unlock(&b->lock);
  4125. cpu_relax();
  4126. goto again;
  4127. }
  4128. n->token = token;
  4129. n->cpu = smp_processor_id();
  4130. - init_waitqueue_head(&n->wq);
  4131. + init_swait_head(&n->wq);
  4132. hlist_add_head(&n->link, &b->list);
  4133. } else
  4134. apf_task_wake_one(n);
  4135. - spin_unlock(&b->lock);
  4136. + raw_spin_unlock(&b->lock);
  4137. return;
  4138. }
  4139. EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
  4140. @@ -486,7 +487,7 @@
  4141. paravirt_ops_setup();
  4142. register_reboot_notifier(&kvm_pv_reboot_nb);
  4143. for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
  4144. - spin_lock_init(&async_pf_sleepers[i].lock);
  4145. + raw_spin_lock_init(&async_pf_sleepers[i].lock);
  4146. if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
  4147. x86_init.irqs.trap_init = kvm_apf_trap_init;
  4148. diff -Nur linux-4.1.39.orig/arch/x86/kernel/process_32.c linux-4.1.39/arch/x86/kernel/process_32.c
  4149. --- linux-4.1.39.orig/arch/x86/kernel/process_32.c 2017-03-13 21:04:36.000000000 +0100
  4150. +++ linux-4.1.39/arch/x86/kernel/process_32.c 2017-04-18 17:56:30.569395424 +0200
  4151. @@ -35,6 +35,7 @@
  4152. #include <linux/uaccess.h>
  4153. #include <linux/io.h>
  4154. #include <linux/kdebug.h>
  4155. +#include <linux/highmem.h>
  4156. #include <asm/pgtable.h>
  4157. #include <asm/ldt.h>
  4158. @@ -210,6 +211,35 @@
  4159. }
  4160. EXPORT_SYMBOL_GPL(start_thread);
  4161. +#ifdef CONFIG_PREEMPT_RT_FULL
  4162. +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
  4163. +{
  4164. + int i;
  4165. +
  4166. + /*
  4167. + * Clear @prev's kmap_atomic mappings
  4168. + */
  4169. + for (i = 0; i < prev_p->kmap_idx; i++) {
  4170. + int idx = i + KM_TYPE_NR * smp_processor_id();
  4171. + pte_t *ptep = kmap_pte - idx;
  4172. +
  4173. + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
  4174. + }
  4175. + /*
  4176. + * Restore @next_p's kmap_atomic mappings
  4177. + */
  4178. + for (i = 0; i < next_p->kmap_idx; i++) {
  4179. + int idx = i + KM_TYPE_NR * smp_processor_id();
  4180. +
  4181. + if (!pte_none(next_p->kmap_pte[i]))
  4182. + set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
  4183. + }
  4184. +}
  4185. +#else
  4186. +static inline void
  4187. +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
  4188. +#endif
  4189. +
  4190. /*
  4191. * switch_to(x,y) should switch tasks from x to y.
  4192. @@ -292,6 +322,8 @@
  4193. task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
  4194. __switch_to_xtra(prev_p, next_p, tss);
  4195. + switch_kmaps(prev_p, next_p);
  4196. +
  4197. /*
  4198. * Leave lazy mode, flushing any hypercalls made here.
  4199. * This must be done before restoring TLS segments so
  4200. diff -Nur linux-4.1.39.orig/arch/x86/kernel/signal.c linux-4.1.39/arch/x86/kernel/signal.c
  4201. --- linux-4.1.39.orig/arch/x86/kernel/signal.c 2017-03-13 21:04:36.000000000 +0100
  4202. +++ linux-4.1.39/arch/x86/kernel/signal.c 2017-04-18 17:56:30.569395424 +0200
  4203. @@ -726,6 +726,14 @@
  4204. {
  4205. user_exit();
  4206. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  4207. + if (unlikely(current->forced_info.si_signo)) {
  4208. + struct task_struct *t = current;
  4209. + force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
  4210. + t->forced_info.si_signo = 0;
  4211. + }
  4212. +#endif
  4213. +
  4214. if (thread_info_flags & _TIF_UPROBE)
  4215. uprobe_notify_resume(regs);
  4216. diff -Nur linux-4.1.39.orig/arch/x86/kvm/lapic.c linux-4.1.39/arch/x86/kvm/lapic.c
  4217. --- linux-4.1.39.orig/arch/x86/kvm/lapic.c 2017-03-13 21:04:36.000000000 +0100
  4218. +++ linux-4.1.39/arch/x86/kvm/lapic.c 2017-04-18 17:56:30.569395424 +0200
  4219. @@ -1106,7 +1106,7 @@
  4220. static void apic_timer_expired(struct kvm_lapic *apic)
  4221. {
  4222. struct kvm_vcpu *vcpu = apic->vcpu;
  4223. - wait_queue_head_t *q = &vcpu->wq;
  4224. + struct swait_head *q = &vcpu->wq;
  4225. struct kvm_timer *ktimer = &apic->lapic_timer;
  4226. if (atomic_read(&apic->lapic_timer.pending))
  4227. @@ -1115,8 +1115,8 @@
  4228. atomic_inc(&apic->lapic_timer.pending);
  4229. kvm_set_pending_timer(vcpu);
  4230. - if (waitqueue_active(q))
  4231. - wake_up_interruptible(q);
  4232. + if (swaitqueue_active(q))
  4233. + swait_wake_interruptible(q);
  4234. if (apic_lvtt_tscdeadline(apic))
  4235. ktimer->expired_tscdeadline = ktimer->tscdeadline;
  4236. @@ -1169,8 +1169,36 @@
  4237. __delay(tsc_deadline - guest_tsc);
  4238. }
  4239. +static enum hrtimer_restart apic_timer_fn(struct hrtimer *data);
  4240. +
  4241. +static void __apic_timer_expired(struct hrtimer *data)
  4242. +{
  4243. + int ret, i = 0;
  4244. + enum hrtimer_restart r;
  4245. + struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
  4246. +
  4247. + r = apic_timer_fn(data);
  4248. +
  4249. + if (r == HRTIMER_RESTART) {
  4250. + do {
  4251. + ret = hrtimer_start_expires(data, HRTIMER_MODE_ABS);
  4252. + if (ret == -ETIME)
  4253. + hrtimer_add_expires_ns(&ktimer->timer,
  4254. + ktimer->period);
  4255. + i++;
  4256. + } while (ret == -ETIME && i < 10);
  4257. +
  4258. + if (ret == -ETIME) {
  4259. + printk_once(KERN_ERR "%s: failed to reprogram timer\n",
  4260. + __func__);
  4261. + WARN_ON_ONCE(1);
  4262. + }
  4263. + }
  4264. +}
  4265. +
  4266. static void start_apic_timer(struct kvm_lapic *apic)
  4267. {
  4268. + int ret;
  4269. ktime_t now;
  4270. atomic_set(&apic->lapic_timer.pending, 0);
  4271. @@ -1201,9 +1229,11 @@
  4272. }
  4273. }
  4274. - hrtimer_start(&apic->lapic_timer.timer,
  4275. + ret = hrtimer_start(&apic->lapic_timer.timer,
  4276. ktime_add_ns(now, apic->lapic_timer.period),
  4277. HRTIMER_MODE_ABS);
  4278. + if (ret == -ETIME)
  4279. + __apic_timer_expired(&apic->lapic_timer.timer);
  4280. apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
  4281. PRIx64 ", "
  4282. @@ -1235,8 +1265,10 @@
  4283. do_div(ns, this_tsc_khz);
  4284. expire = ktime_add_ns(now, ns);
  4285. expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
  4286. - hrtimer_start(&apic->lapic_timer.timer,
  4287. + ret = hrtimer_start(&apic->lapic_timer.timer,
  4288. expire, HRTIMER_MODE_ABS);
  4289. + if (ret == -ETIME)
  4290. + __apic_timer_expired(&apic->lapic_timer.timer);
  4291. } else
  4292. apic_timer_expired(apic);
  4293. @@ -1709,6 +1741,7 @@
  4294. hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
  4295. HRTIMER_MODE_ABS);
  4296. apic->lapic_timer.timer.function = apic_timer_fn;
  4297. + apic->lapic_timer.timer.irqsafe = 1;
  4298. /*
  4299. * APIC is created enabled. This will prevent kvm_lapic_set_base from
  4300. @@ -1836,7 +1869,8 @@
  4301. timer = &vcpu->arch.apic->lapic_timer.timer;
  4302. if (hrtimer_cancel(timer))
  4303. - hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
  4304. + if (hrtimer_start_expires(timer, HRTIMER_MODE_ABS) == -ETIME)
  4305. + __apic_timer_expired(timer);
  4306. }
  4307. /*
  4308. diff -Nur linux-4.1.39.orig/arch/x86/kvm/x86.c linux-4.1.39/arch/x86/kvm/x86.c
  4309. --- linux-4.1.39.orig/arch/x86/kvm/x86.c 2017-03-13 21:04:36.000000000 +0100
  4310. +++ linux-4.1.39/arch/x86/kvm/x86.c 2017-04-18 17:56:30.569395424 +0200
  4311. @@ -5837,6 +5837,13 @@
  4312. goto out;
  4313. }
  4314. +#ifdef CONFIG_PREEMPT_RT_FULL
  4315. + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
  4316. + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
  4317. + return -EOPNOTSUPP;
  4318. + }
  4319. +#endif
  4320. +
  4321. r = kvm_mmu_module_init();
  4322. if (r)
  4323. goto out_free_percpu;
  4324. diff -Nur linux-4.1.39.orig/arch/x86/lib/usercopy_32.c linux-4.1.39/arch/x86/lib/usercopy_32.c
  4325. --- linux-4.1.39.orig/arch/x86/lib/usercopy_32.c 2017-03-13 21:04:36.000000000 +0100
  4326. +++ linux-4.1.39/arch/x86/lib/usercopy_32.c 2017-04-18 17:56:30.573395579 +0200
  4327. @@ -647,7 +647,8 @@
  4328. * @from: Source address, in kernel space.
  4329. * @n: Number of bytes to copy.
  4330. *
  4331. - * Context: User context only. This function may sleep.
  4332. + * Context: User context only. This function may sleep if pagefaults are
  4333. + * enabled.
  4334. *
  4335. * Copy data from kernel space to user space.
  4336. *
  4337. @@ -668,7 +669,8 @@
  4338. * @from: Source address, in user space.
  4339. * @n: Number of bytes to copy.
  4340. *
  4341. - * Context: User context only. This function may sleep.
  4342. + * Context: User context only. This function may sleep if pagefaults are
  4343. + * enabled.
  4344. *
  4345. * Copy data from user space to kernel space.
  4346. *
  4347. diff -Nur linux-4.1.39.orig/arch/x86/mm/fault.c linux-4.1.39/arch/x86/mm/fault.c
  4348. --- linux-4.1.39.orig/arch/x86/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  4349. +++ linux-4.1.39/arch/x86/mm/fault.c 2017-04-18 17:56:30.573395579 +0200
  4350. @@ -13,6 +13,7 @@
  4351. #include <linux/hugetlb.h> /* hstate_index_to_shift */
  4352. #include <linux/prefetch.h> /* prefetchw */
  4353. #include <linux/context_tracking.h> /* exception_enter(), ... */
  4354. +#include <linux/uaccess.h> /* faulthandler_disabled() */
  4355. #include <asm/traps.h> /* dotraplinkage, ... */
  4356. #include <asm/pgalloc.h> /* pgd_*(), ... */
  4357. @@ -1133,9 +1134,9 @@
  4358. /*
  4359. * If we're in an interrupt, have no user context or are running
  4360. - * in an atomic region then we must not take the fault:
  4361. + * in a region with pagefaults disabled then we must not take the fault
  4362. */
  4363. - if (unlikely(in_atomic() || !mm)) {
  4364. + if (unlikely(faulthandler_disabled() || !mm)) {
  4365. bad_area_nosemaphore(regs, error_code, address);
  4366. return;
  4367. }
  4368. diff -Nur linux-4.1.39.orig/arch/x86/mm/highmem_32.c linux-4.1.39/arch/x86/mm/highmem_32.c
  4369. --- linux-4.1.39.orig/arch/x86/mm/highmem_32.c 2017-03-13 21:04:36.000000000 +0100
  4370. +++ linux-4.1.39/arch/x86/mm/highmem_32.c 2017-04-18 17:56:30.573395579 +0200
  4371. @@ -32,10 +32,11 @@
  4372. */
  4373. void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  4374. {
  4375. + pte_t pte = mk_pte(page, prot);
  4376. unsigned long vaddr;
  4377. int idx, type;
  4378. - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
  4379. + preempt_disable_nort();
  4380. pagefault_disable();
  4381. if (!PageHighMem(page))
  4382. @@ -45,7 +46,10 @@
  4383. idx = type + KM_TYPE_NR*smp_processor_id();
  4384. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  4385. BUG_ON(!pte_none(*(kmap_pte-idx)));
  4386. - set_pte(kmap_pte-idx, mk_pte(page, prot));
  4387. +#ifdef CONFIG_PREEMPT_RT_FULL
  4388. + current->kmap_pte[type] = pte;
  4389. +#endif
  4390. + set_pte(kmap_pte-idx, pte);
  4391. arch_flush_lazy_mmu_mode();
  4392. return (void *)vaddr;
  4393. @@ -88,6 +92,9 @@
  4394. * is a bad idea also, in case the page changes cacheability
  4395. * attributes or becomes a protected page in a hypervisor.
  4396. */
  4397. +#ifdef CONFIG_PREEMPT_RT_FULL
  4398. + current->kmap_pte[type] = __pte(0);
  4399. +#endif
  4400. kpte_clear_flush(kmap_pte-idx, vaddr);
  4401. kmap_atomic_idx_pop();
  4402. arch_flush_lazy_mmu_mode();
  4403. @@ -100,6 +107,7 @@
  4404. #endif
  4405. pagefault_enable();
  4406. + preempt_enable_nort();
  4407. }
  4408. EXPORT_SYMBOL(__kunmap_atomic);
  4409. diff -Nur linux-4.1.39.orig/arch/x86/mm/iomap_32.c linux-4.1.39/arch/x86/mm/iomap_32.c
  4410. --- linux-4.1.39.orig/arch/x86/mm/iomap_32.c 2017-03-13 21:04:36.000000000 +0100
  4411. +++ linux-4.1.39/arch/x86/mm/iomap_32.c 2017-04-18 17:56:30.573395579 +0200
  4412. @@ -56,15 +56,22 @@
  4413. void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
  4414. {
  4415. + pte_t pte = pfn_pte(pfn, prot);
  4416. unsigned long vaddr;
  4417. int idx, type;
  4418. + preempt_disable();
  4419. pagefault_disable();
  4420. type = kmap_atomic_idx_push();
  4421. idx = type + KM_TYPE_NR * smp_processor_id();
  4422. vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
  4423. - set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
  4424. + WARN_ON(!pte_none(*(kmap_pte - idx)));
  4425. +
  4426. +#ifdef CONFIG_PREEMPT_RT_FULL
  4427. + current->kmap_pte[type] = pte;
  4428. +#endif
  4429. + set_pte(kmap_pte - idx, pte);
  4430. arch_flush_lazy_mmu_mode();
  4431. return (void *)vaddr;
  4432. @@ -112,10 +119,14 @@
  4433. * is a bad idea also, in case the page changes cacheability
  4434. * attributes or becomes a protected page in a hypervisor.
  4435. */
  4436. +#ifdef CONFIG_PREEMPT_RT_FULL
  4437. + current->kmap_pte[type] = __pte(0);
  4438. +#endif
  4439. kpte_clear_flush(kmap_pte-idx, vaddr);
  4440. kmap_atomic_idx_pop();
  4441. }
  4442. pagefault_enable();
  4443. + preempt_enable();
  4444. }
  4445. EXPORT_SYMBOL_GPL(iounmap_atomic);
  4446. diff -Nur linux-4.1.39.orig/arch/x86/mm/pageattr.c linux-4.1.39/arch/x86/mm/pageattr.c
  4447. --- linux-4.1.39.orig/arch/x86/mm/pageattr.c 2017-03-13 21:04:36.000000000 +0100
  4448. +++ linux-4.1.39/arch/x86/mm/pageattr.c 2017-04-18 17:56:30.573395579 +0200
  4449. @@ -209,7 +209,15 @@
  4450. int in_flags, struct page **pages)
  4451. {
  4452. unsigned int i, level;
  4453. +#ifdef CONFIG_PREEMPT
  4454. + /*
  4455. + * Avoid wbinvd() because it causes latencies on all CPUs,
  4456. + * regardless of any CPU isolation that may be in effect.
  4457. + */
  4458. + unsigned long do_wbinvd = 0;
  4459. +#else
  4460. unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
  4461. +#endif
  4462. BUG_ON(irqs_disabled());
  4463. diff -Nur linux-4.1.39.orig/arch/x86/platform/uv/tlb_uv.c linux-4.1.39/arch/x86/platform/uv/tlb_uv.c
  4464. --- linux-4.1.39.orig/arch/x86/platform/uv/tlb_uv.c 2017-03-13 21:04:36.000000000 +0100
  4465. +++ linux-4.1.39/arch/x86/platform/uv/tlb_uv.c 2017-04-18 17:56:30.573395579 +0200
  4466. @@ -714,9 +714,9 @@
  4467. quiesce_local_uvhub(hmaster);
  4468. - spin_lock(&hmaster->queue_lock);
  4469. + raw_spin_lock(&hmaster->queue_lock);
  4470. reset_with_ipi(&bau_desc->distribution, bcp);
  4471. - spin_unlock(&hmaster->queue_lock);
  4472. + raw_spin_unlock(&hmaster->queue_lock);
  4473. end_uvhub_quiesce(hmaster);
  4474. @@ -736,9 +736,9 @@
  4475. quiesce_local_uvhub(hmaster);
  4476. - spin_lock(&hmaster->queue_lock);
  4477. + raw_spin_lock(&hmaster->queue_lock);
  4478. reset_with_ipi(&bau_desc->distribution, bcp);
  4479. - spin_unlock(&hmaster->queue_lock);
  4480. + raw_spin_unlock(&hmaster->queue_lock);
  4481. end_uvhub_quiesce(hmaster);
  4482. @@ -759,7 +759,7 @@
  4483. cycles_t tm1;
  4484. hmaster = bcp->uvhub_master;
  4485. - spin_lock(&hmaster->disable_lock);
  4486. + raw_spin_lock(&hmaster->disable_lock);
  4487. if (!bcp->baudisabled) {
  4488. stat->s_bau_disabled++;
  4489. tm1 = get_cycles();
  4490. @@ -772,7 +772,7 @@
  4491. }
  4492. }
  4493. }
  4494. - spin_unlock(&hmaster->disable_lock);
  4495. + raw_spin_unlock(&hmaster->disable_lock);
  4496. }
  4497. static void count_max_concurr(int stat, struct bau_control *bcp,
  4498. @@ -835,7 +835,7 @@
  4499. */
  4500. static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
  4501. {
  4502. - spinlock_t *lock = &hmaster->uvhub_lock;
  4503. + raw_spinlock_t *lock = &hmaster->uvhub_lock;
  4504. atomic_t *v;
  4505. v = &hmaster->active_descriptor_count;
  4506. @@ -968,7 +968,7 @@
  4507. struct bau_control *hmaster;
  4508. hmaster = bcp->uvhub_master;
  4509. - spin_lock(&hmaster->disable_lock);
  4510. + raw_spin_lock(&hmaster->disable_lock);
  4511. if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
  4512. stat->s_bau_reenabled++;
  4513. for_each_present_cpu(tcpu) {
  4514. @@ -980,10 +980,10 @@
  4515. tbcp->period_giveups = 0;
  4516. }
  4517. }
  4518. - spin_unlock(&hmaster->disable_lock);
  4519. + raw_spin_unlock(&hmaster->disable_lock);
  4520. return 0;
  4521. }
  4522. - spin_unlock(&hmaster->disable_lock);
  4523. + raw_spin_unlock(&hmaster->disable_lock);
  4524. return -1;
  4525. }
  4526. @@ -1901,9 +1901,9 @@
  4527. bcp->cong_reps = congested_reps;
  4528. bcp->disabled_period = sec_2_cycles(disabled_period);
  4529. bcp->giveup_limit = giveup_limit;
  4530. - spin_lock_init(&bcp->queue_lock);
  4531. - spin_lock_init(&bcp->uvhub_lock);
  4532. - spin_lock_init(&bcp->disable_lock);
  4533. + raw_spin_lock_init(&bcp->queue_lock);
  4534. + raw_spin_lock_init(&bcp->uvhub_lock);
  4535. + raw_spin_lock_init(&bcp->disable_lock);
  4536. }
  4537. }
  4538. diff -Nur linux-4.1.39.orig/arch/x86/platform/uv/uv_time.c linux-4.1.39/arch/x86/platform/uv/uv_time.c
  4539. --- linux-4.1.39.orig/arch/x86/platform/uv/uv_time.c 2017-03-13 21:04:36.000000000 +0100
  4540. +++ linux-4.1.39/arch/x86/platform/uv/uv_time.c 2017-04-18 17:56:30.573395579 +0200
  4541. @@ -58,7 +58,7 @@
  4542. /* There is one of these allocated per node */
  4543. struct uv_rtc_timer_head {
  4544. - spinlock_t lock;
  4545. + raw_spinlock_t lock;
  4546. /* next cpu waiting for timer, local node relative: */
  4547. int next_cpu;
  4548. /* number of cpus on this node: */
  4549. @@ -178,7 +178,7 @@
  4550. uv_rtc_deallocate_timers();
  4551. return -ENOMEM;
  4552. }
  4553. - spin_lock_init(&head->lock);
  4554. + raw_spin_lock_init(&head->lock);
  4555. head->ncpus = uv_blade_nr_possible_cpus(bid);
  4556. head->next_cpu = -1;
  4557. blade_info[bid] = head;
  4558. @@ -232,7 +232,7 @@
  4559. unsigned long flags;
  4560. int next_cpu;
  4561. - spin_lock_irqsave(&head->lock, flags);
  4562. + raw_spin_lock_irqsave(&head->lock, flags);
  4563. next_cpu = head->next_cpu;
  4564. *t = expires;
  4565. @@ -244,12 +244,12 @@
  4566. if (uv_setup_intr(cpu, expires)) {
  4567. *t = ULLONG_MAX;
  4568. uv_rtc_find_next_timer(head, pnode);
  4569. - spin_unlock_irqrestore(&head->lock, flags);
  4570. + raw_spin_unlock_irqrestore(&head->lock, flags);
  4571. return -ETIME;
  4572. }
  4573. }
  4574. - spin_unlock_irqrestore(&head->lock, flags);
  4575. + raw_spin_unlock_irqrestore(&head->lock, flags);
  4576. return 0;
  4577. }
  4578. @@ -268,7 +268,7 @@
  4579. unsigned long flags;
  4580. int rc = 0;
  4581. - spin_lock_irqsave(&head->lock, flags);
  4582. + raw_spin_lock_irqsave(&head->lock, flags);
  4583. if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
  4584. rc = 1;
  4585. @@ -280,7 +280,7 @@
  4586. uv_rtc_find_next_timer(head, pnode);
  4587. }
  4588. - spin_unlock_irqrestore(&head->lock, flags);
  4589. + raw_spin_unlock_irqrestore(&head->lock, flags);
  4590. return rc;
  4591. }
  4592. @@ -300,13 +300,18 @@
  4593. static cycle_t uv_read_rtc(struct clocksource *cs)
  4594. {
  4595. unsigned long offset;
  4596. + cycle_t cycles;
  4597. + preempt_disable();
  4598. if (uv_get_min_hub_revision_id() == 1)
  4599. offset = 0;
  4600. else
  4601. offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
  4602. - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  4603. + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
  4604. + preempt_enable();
  4605. +
  4606. + return cycles;
  4607. }
  4608. /*
  4609. diff -Nur linux-4.1.39.orig/arch/xtensa/mm/fault.c linux-4.1.39/arch/xtensa/mm/fault.c
  4610. --- linux-4.1.39.orig/arch/xtensa/mm/fault.c 2017-03-13 21:04:36.000000000 +0100
  4611. +++ linux-4.1.39/arch/xtensa/mm/fault.c 2017-04-18 17:56:30.573395579 +0200
  4612. @@ -15,10 +15,10 @@
  4613. #include <linux/mm.h>
  4614. #include <linux/module.h>
  4615. #include <linux/hardirq.h>
  4616. +#include <linux/uaccess.h>
  4617. #include <asm/mmu_context.h>
  4618. #include <asm/cacheflush.h>
  4619. #include <asm/hardirq.h>
  4620. -#include <asm/uaccess.h>
  4621. #include <asm/pgalloc.h>
  4622. DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST;
  4623. @@ -57,7 +57,7 @@
  4624. /* If we're in an interrupt or have no user
  4625. * context, we must not take the fault..
  4626. */
  4627. - if (in_atomic() || !mm) {
  4628. + if (faulthandler_disabled() || !mm) {
  4629. bad_page_fault(regs, address, SIGSEGV);
  4630. return;
  4631. }
  4632. diff -Nur linux-4.1.39.orig/arch/xtensa/mm/highmem.c linux-4.1.39/arch/xtensa/mm/highmem.c
  4633. --- linux-4.1.39.orig/arch/xtensa/mm/highmem.c 2017-03-13 21:04:36.000000000 +0100
  4634. +++ linux-4.1.39/arch/xtensa/mm/highmem.c 2017-04-18 17:56:30.573395579 +0200
  4635. @@ -42,6 +42,7 @@
  4636. enum fixed_addresses idx;
  4637. unsigned long vaddr;
  4638. + preempt_disable();
  4639. pagefault_disable();
  4640. if (!PageHighMem(page))
  4641. return page_address(page);
  4642. @@ -79,6 +80,7 @@
  4643. }
  4644. pagefault_enable();
  4645. + preempt_enable();
  4646. }
  4647. EXPORT_SYMBOL(__kunmap_atomic);
  4648. diff -Nur linux-4.1.39.orig/block/blk-core.c linux-4.1.39/block/blk-core.c
  4649. --- linux-4.1.39.orig/block/blk-core.c 2017-03-13 21:04:36.000000000 +0100
  4650. +++ linux-4.1.39/block/blk-core.c 2017-04-18 17:56:30.573395579 +0200
  4651. @@ -100,6 +100,9 @@
  4652. INIT_LIST_HEAD(&rq->queuelist);
  4653. INIT_LIST_HEAD(&rq->timeout_list);
  4654. +#ifdef CONFIG_PREEMPT_RT_FULL
  4655. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  4656. +#endif
  4657. rq->cpu = -1;
  4658. rq->q = q;
  4659. rq->__sector = (sector_t) -1;
  4660. @@ -194,7 +197,7 @@
  4661. **/
  4662. void blk_start_queue(struct request_queue *q)
  4663. {
  4664. - WARN_ON(!irqs_disabled());
  4665. + WARN_ON_NONRT(!irqs_disabled());
  4666. queue_flag_clear(QUEUE_FLAG_STOPPED, q);
  4667. __blk_run_queue(q);
  4668. @@ -663,7 +666,7 @@
  4669. q->bypass_depth = 1;
  4670. __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
  4671. - init_waitqueue_head(&q->mq_freeze_wq);
  4672. + init_swait_head(&q->mq_freeze_wq);
  4673. if (blkcg_init_queue(q))
  4674. goto fail_bdi;
  4675. @@ -3079,7 +3082,7 @@
  4676. blk_run_queue_async(q);
  4677. else
  4678. __blk_run_queue(q);
  4679. - spin_unlock(q->queue_lock);
  4680. + spin_unlock_irq(q->queue_lock);
  4681. }
  4682. static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
  4683. @@ -3127,7 +3130,6 @@
  4684. void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  4685. {
  4686. struct request_queue *q;
  4687. - unsigned long flags;
  4688. struct request *rq;
  4689. LIST_HEAD(list);
  4690. unsigned int depth;
  4691. @@ -3147,11 +3149,6 @@
  4692. q = NULL;
  4693. depth = 0;
  4694. - /*
  4695. - * Save and disable interrupts here, to avoid doing it for every
  4696. - * queue lock we have to take.
  4697. - */
  4698. - local_irq_save(flags);
  4699. while (!list_empty(&list)) {
  4700. rq = list_entry_rq(list.next);
  4701. list_del_init(&rq->queuelist);
  4702. @@ -3164,7 +3161,7 @@
  4703. queue_unplugged(q, depth, from_schedule);
  4704. q = rq->q;
  4705. depth = 0;
  4706. - spin_lock(q->queue_lock);
  4707. + spin_lock_irq(q->queue_lock);
  4708. }
  4709. /*
  4710. @@ -3191,8 +3188,6 @@
  4711. */
  4712. if (q)
  4713. queue_unplugged(q, depth, from_schedule);
  4714. -
  4715. - local_irq_restore(flags);
  4716. }
  4717. void blk_finish_plug(struct blk_plug *plug)
  4718. diff -Nur linux-4.1.39.orig/block/blk-ioc.c linux-4.1.39/block/blk-ioc.c
  4719. --- linux-4.1.39.orig/block/blk-ioc.c 2017-03-13 21:04:36.000000000 +0100
  4720. +++ linux-4.1.39/block/blk-ioc.c 2017-04-18 17:56:30.573395579 +0200
  4721. @@ -7,6 +7,7 @@
  4722. #include <linux/bio.h>
  4723. #include <linux/blkdev.h>
  4724. #include <linux/slab.h>
  4725. +#include <linux/delay.h>
  4726. #include "blk.h"
  4727. @@ -109,7 +110,7 @@
  4728. spin_unlock(q->queue_lock);
  4729. } else {
  4730. spin_unlock_irqrestore(&ioc->lock, flags);
  4731. - cpu_relax();
  4732. + cpu_chill();
  4733. spin_lock_irqsave_nested(&ioc->lock, flags, 1);
  4734. }
  4735. }
  4736. @@ -187,7 +188,7 @@
  4737. spin_unlock(icq->q->queue_lock);
  4738. } else {
  4739. spin_unlock_irqrestore(&ioc->lock, flags);
  4740. - cpu_relax();
  4741. + cpu_chill();
  4742. goto retry;
  4743. }
  4744. }
  4745. diff -Nur linux-4.1.39.orig/block/blk-iopoll.c linux-4.1.39/block/blk-iopoll.c
  4746. --- linux-4.1.39.orig/block/blk-iopoll.c 2017-03-13 21:04:36.000000000 +0100
  4747. +++ linux-4.1.39/block/blk-iopoll.c 2017-04-18 17:56:30.573395579 +0200
  4748. @@ -35,6 +35,7 @@
  4749. list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
  4750. __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
  4751. local_irq_restore(flags);
  4752. + preempt_check_resched_rt();
  4753. }
  4754. EXPORT_SYMBOL(blk_iopoll_sched);
  4755. @@ -132,6 +133,7 @@
  4756. __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
  4757. local_irq_enable();
  4758. + preempt_check_resched_rt();
  4759. }
  4760. /**
  4761. @@ -201,6 +203,7 @@
  4762. this_cpu_ptr(&blk_cpu_iopoll));
  4763. __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
  4764. local_irq_enable();
  4765. + preempt_check_resched_rt();
  4766. }
  4767. return NOTIFY_OK;
  4768. diff -Nur linux-4.1.39.orig/block/blk-mq.c linux-4.1.39/block/blk-mq.c
  4769. --- linux-4.1.39.orig/block/blk-mq.c 2017-03-13 21:04:36.000000000 +0100
  4770. +++ linux-4.1.39/block/blk-mq.c 2017-04-18 17:56:30.573395579 +0200
  4771. @@ -88,7 +88,7 @@
  4772. if (!(gfp & __GFP_WAIT))
  4773. return -EBUSY;
  4774. - ret = wait_event_interruptible(q->mq_freeze_wq,
  4775. + ret = swait_event_interruptible(q->mq_freeze_wq,
  4776. !q->mq_freeze_depth || blk_queue_dying(q));
  4777. if (blk_queue_dying(q))
  4778. return -ENODEV;
  4779. @@ -107,7 +107,7 @@
  4780. struct request_queue *q =
  4781. container_of(ref, struct request_queue, mq_usage_counter);
  4782. - wake_up_all(&q->mq_freeze_wq);
  4783. + swait_wake_all(&q->mq_freeze_wq);
  4784. }
  4785. void blk_mq_freeze_queue_start(struct request_queue *q)
  4786. @@ -127,7 +127,7 @@
  4787. static void blk_mq_freeze_queue_wait(struct request_queue *q)
  4788. {
  4789. - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
  4790. + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
  4791. }
  4792. /*
  4793. @@ -151,7 +151,7 @@
  4794. spin_unlock_irq(q->queue_lock);
  4795. if (wake) {
  4796. percpu_ref_reinit(&q->mq_usage_counter);
  4797. - wake_up_all(&q->mq_freeze_wq);
  4798. + swait_wake_all(&q->mq_freeze_wq);
  4799. }
  4800. }
  4801. EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
  4802. @@ -170,7 +170,7 @@
  4803. * dying, we need to ensure that processes currently waiting on
  4804. * the queue are notified as well.
  4805. */
  4806. - wake_up_all(&q->mq_freeze_wq);
  4807. + swait_wake_all(&q->mq_freeze_wq);
  4808. }
  4809. bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
  4810. @@ -217,6 +217,9 @@
  4811. rq->resid_len = 0;
  4812. rq->sense = NULL;
  4813. +#ifdef CONFIG_PREEMPT_RT_FULL
  4814. + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
  4815. +#endif
  4816. INIT_LIST_HEAD(&rq->timeout_list);
  4817. rq->timeout = 0;
  4818. @@ -346,6 +349,17 @@
  4819. }
  4820. EXPORT_SYMBOL(blk_mq_end_request);
  4821. +#ifdef CONFIG_PREEMPT_RT_FULL
  4822. +
  4823. +void __blk_mq_complete_request_remote_work(struct work_struct *work)
  4824. +{
  4825. + struct request *rq = container_of(work, struct request, work);
  4826. +
  4827. + rq->q->softirq_done_fn(rq);
  4828. +}
  4829. +
  4830. +#else
  4831. +
  4832. static void __blk_mq_complete_request_remote(void *data)
  4833. {
  4834. struct request *rq = data;
  4835. @@ -353,6 +367,8 @@
  4836. rq->q->softirq_done_fn(rq);
  4837. }
  4838. +#endif
  4839. +
  4840. static void blk_mq_ipi_complete_request(struct request *rq)
  4841. {
  4842. struct blk_mq_ctx *ctx = rq->mq_ctx;
  4843. @@ -364,19 +380,23 @@
  4844. return;
  4845. }
  4846. - cpu = get_cpu();
  4847. + cpu = get_cpu_light();
  4848. if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
  4849. shared = cpus_share_cache(cpu, ctx->cpu);
  4850. if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
  4851. +#ifdef CONFIG_PREEMPT_RT_FULL
  4852. + schedule_work_on(ctx->cpu, &rq->work);
  4853. +#else
  4854. rq->csd.func = __blk_mq_complete_request_remote;
  4855. rq->csd.info = rq;
  4856. rq->csd.flags = 0;
  4857. smp_call_function_single_async(ctx->cpu, &rq->csd);
  4858. +#endif
  4859. } else {
  4860. rq->q->softirq_done_fn(rq);
  4861. }
  4862. - put_cpu();
  4863. + put_cpu_light();
  4864. }
  4865. void __blk_mq_complete_request(struct request *rq)
  4866. @@ -905,14 +925,14 @@
  4867. return;
  4868. if (!async) {
  4869. - int cpu = get_cpu();
  4870. + int cpu = get_cpu_light();
  4871. if (cpumask_test_cpu(cpu, hctx->cpumask)) {
  4872. __blk_mq_run_hw_queue(hctx);
  4873. - put_cpu();
  4874. + put_cpu_light();
  4875. return;
  4876. }
  4877. - put_cpu();
  4878. + put_cpu_light();
  4879. }
  4880. kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
  4881. @@ -1589,7 +1609,7 @@
  4882. {
  4883. struct blk_mq_hw_ctx *hctx = data;
  4884. - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
  4885. + if (action == CPU_POST_DEAD)
  4886. return blk_mq_hctx_cpu_offline(hctx, cpu);
  4887. /*
  4888. diff -Nur linux-4.1.39.orig/block/blk-mq-cpu.c linux-4.1.39/block/blk-mq-cpu.c
  4889. --- linux-4.1.39.orig/block/blk-mq-cpu.c 2017-03-13 21:04:36.000000000 +0100
  4890. +++ linux-4.1.39/block/blk-mq-cpu.c 2017-04-18 17:56:30.573395579 +0200
  4891. @@ -16,7 +16,7 @@
  4892. #include "blk-mq.h"
  4893. static LIST_HEAD(blk_mq_cpu_notify_list);
  4894. -static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
  4895. +static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
  4896. static int blk_mq_main_cpu_notify(struct notifier_block *self,
  4897. unsigned long action, void *hcpu)
  4898. @@ -25,7 +25,10 @@
  4899. struct blk_mq_cpu_notifier *notify;
  4900. int ret = NOTIFY_OK;
  4901. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  4902. + if (action != CPU_POST_DEAD)
  4903. + return NOTIFY_OK;
  4904. +
  4905. + spin_lock(&blk_mq_cpu_notify_lock);
  4906. list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
  4907. ret = notify->notify(notify->data, action, cpu);
  4908. @@ -33,7 +36,7 @@
  4909. break;
  4910. }
  4911. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  4912. + spin_unlock(&blk_mq_cpu_notify_lock);
  4913. return ret;
  4914. }
  4915. @@ -41,16 +44,16 @@
  4916. {
  4917. BUG_ON(!notifier->notify);
  4918. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  4919. + spin_lock(&blk_mq_cpu_notify_lock);
  4920. list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
  4921. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  4922. + spin_unlock(&blk_mq_cpu_notify_lock);
  4923. }
  4924. void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
  4925. {
  4926. - raw_spin_lock(&blk_mq_cpu_notify_lock);
  4927. + spin_lock(&blk_mq_cpu_notify_lock);
  4928. list_del(&notifier->list);
  4929. - raw_spin_unlock(&blk_mq_cpu_notify_lock);
  4930. + spin_unlock(&blk_mq_cpu_notify_lock);
  4931. }
  4932. void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
  4933. diff -Nur linux-4.1.39.orig/block/blk-mq.h linux-4.1.39/block/blk-mq.h
  4934. --- linux-4.1.39.orig/block/blk-mq.h 2017-03-13 21:04:36.000000000 +0100
  4935. +++ linux-4.1.39/block/blk-mq.h 2017-04-18 17:56:30.573395579 +0200
  4936. @@ -76,7 +76,10 @@
  4937. static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
  4938. unsigned int cpu)
  4939. {
  4940. - return per_cpu_ptr(q->queue_ctx, cpu);
  4941. + struct blk_mq_ctx *ctx;
  4942. +
  4943. + ctx = per_cpu_ptr(q->queue_ctx, cpu);
  4944. + return ctx;
  4945. }
  4946. /*
  4947. @@ -87,12 +90,12 @@
  4948. */
  4949. static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
  4950. {
  4951. - return __blk_mq_get_ctx(q, get_cpu());
  4952. + return __blk_mq_get_ctx(q, get_cpu_light());
  4953. }
  4954. static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
  4955. {
  4956. - put_cpu();
  4957. + put_cpu_light();
  4958. }
  4959. struct blk_mq_alloc_data {
  4960. diff -Nur linux-4.1.39.orig/block/blk-softirq.c linux-4.1.39/block/blk-softirq.c
  4961. --- linux-4.1.39.orig/block/blk-softirq.c 2017-03-13 21:04:36.000000000 +0100
  4962. +++ linux-4.1.39/block/blk-softirq.c 2017-04-18 17:56:30.573395579 +0200
  4963. @@ -51,6 +51,7 @@
  4964. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  4965. local_irq_restore(flags);
  4966. + preempt_check_resched_rt();
  4967. }
  4968. /*
  4969. @@ -93,6 +94,7 @@
  4970. this_cpu_ptr(&blk_cpu_done));
  4971. raise_softirq_irqoff(BLOCK_SOFTIRQ);
  4972. local_irq_enable();
  4973. + preempt_check_resched_rt();
  4974. }
  4975. return NOTIFY_OK;
  4976. @@ -150,6 +152,7 @@
  4977. goto do_local;
  4978. local_irq_restore(flags);
  4979. + preempt_check_resched_rt();
  4980. }
  4981. /**
  4982. diff -Nur linux-4.1.39.orig/block/bounce.c linux-4.1.39/block/bounce.c
  4983. --- linux-4.1.39.orig/block/bounce.c 2017-03-13 21:04:36.000000000 +0100
  4984. +++ linux-4.1.39/block/bounce.c 2017-04-18 17:56:30.573395579 +0200
  4985. @@ -54,11 +54,11 @@
  4986. unsigned long flags;
  4987. unsigned char *vto;
  4988. - local_irq_save(flags);
  4989. + local_irq_save_nort(flags);
  4990. vto = kmap_atomic(to->bv_page);
  4991. memcpy(vto + to->bv_offset, vfrom, to->bv_len);
  4992. kunmap_atomic(vto);
  4993. - local_irq_restore(flags);
  4994. + local_irq_restore_nort(flags);
  4995. }
  4996. #else /* CONFIG_HIGHMEM */
  4997. diff -Nur linux-4.1.39.orig/crypto/algapi.c linux-4.1.39/crypto/algapi.c
  4998. --- linux-4.1.39.orig/crypto/algapi.c 2017-03-13 21:04:36.000000000 +0100
  4999. +++ linux-4.1.39/crypto/algapi.c 2017-04-18 17:56:30.573395579 +0200
  5000. @@ -696,13 +696,13 @@
  5001. int crypto_register_notifier(struct notifier_block *nb)
  5002. {
  5003. - return blocking_notifier_chain_register(&crypto_chain, nb);
  5004. + return srcu_notifier_chain_register(&crypto_chain, nb);
  5005. }
  5006. EXPORT_SYMBOL_GPL(crypto_register_notifier);
  5007. int crypto_unregister_notifier(struct notifier_block *nb)
  5008. {
  5009. - return blocking_notifier_chain_unregister(&crypto_chain, nb);
  5010. + return srcu_notifier_chain_unregister(&crypto_chain, nb);
  5011. }
  5012. EXPORT_SYMBOL_GPL(crypto_unregister_notifier);
  5013. diff -Nur linux-4.1.39.orig/crypto/api.c linux-4.1.39/crypto/api.c
  5014. --- linux-4.1.39.orig/crypto/api.c 2017-03-13 21:04:36.000000000 +0100
  5015. +++ linux-4.1.39/crypto/api.c 2017-04-18 17:56:30.573395579 +0200
  5016. @@ -31,7 +31,7 @@
  5017. DECLARE_RWSEM(crypto_alg_sem);
  5018. EXPORT_SYMBOL_GPL(crypto_alg_sem);
  5019. -BLOCKING_NOTIFIER_HEAD(crypto_chain);
  5020. +SRCU_NOTIFIER_HEAD(crypto_chain);
  5021. EXPORT_SYMBOL_GPL(crypto_chain);
  5022. static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg);
  5023. @@ -236,10 +236,10 @@
  5024. {
  5025. int ok;
  5026. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  5027. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  5028. if (ok == NOTIFY_DONE) {
  5029. request_module("cryptomgr");
  5030. - ok = blocking_notifier_call_chain(&crypto_chain, val, v);
  5031. + ok = srcu_notifier_call_chain(&crypto_chain, val, v);
  5032. }
  5033. return ok;
  5034. diff -Nur linux-4.1.39.orig/crypto/internal.h linux-4.1.39/crypto/internal.h
  5035. --- linux-4.1.39.orig/crypto/internal.h 2017-03-13 21:04:36.000000000 +0100
  5036. +++ linux-4.1.39/crypto/internal.h 2017-04-18 17:56:30.573395579 +0200
  5037. @@ -48,7 +48,7 @@
  5038. extern struct list_head crypto_alg_list;
  5039. extern struct rw_semaphore crypto_alg_sem;
  5040. -extern struct blocking_notifier_head crypto_chain;
  5041. +extern struct srcu_notifier_head crypto_chain;
  5042. #ifdef CONFIG_PROC_FS
  5043. void __init crypto_init_proc(void);
  5044. @@ -142,7 +142,7 @@
  5045. static inline void crypto_notify(unsigned long val, void *v)
  5046. {
  5047. - blocking_notifier_call_chain(&crypto_chain, val, v);
  5048. + srcu_notifier_call_chain(&crypto_chain, val, v);
  5049. }
  5050. #endif /* _CRYPTO_INTERNAL_H */
  5051. diff -Nur linux-4.1.39.orig/Documentation/hwlat_detector.txt linux-4.1.39/Documentation/hwlat_detector.txt
  5052. --- linux-4.1.39.orig/Documentation/hwlat_detector.txt 1970-01-01 01:00:00.000000000 +0100
  5053. +++ linux-4.1.39/Documentation/hwlat_detector.txt 2017-04-18 17:56:30.545394493 +0200
  5054. @@ -0,0 +1,64 @@
  5055. +Introduction:
  5056. +-------------
  5057. +
  5058. +The module hwlat_detector is a special purpose kernel module that is used to
  5059. +detect large system latencies induced by the behavior of certain underlying
  5060. +hardware or firmware, independent of Linux itself. The code was developed
  5061. +originally to detect SMIs (System Management Interrupts) on x86 systems,
  5062. +however there is nothing x86 specific about this patchset. It was
  5063. +originally written for use by the "RT" patch since the Real Time
  5064. +kernel is highly latency sensitive.
  5065. +
  5066. +SMIs are usually not serviced by the Linux kernel, which typically does not
  5067. +even know that they are occuring. SMIs are instead are set up by BIOS code
  5068. +and are serviced by BIOS code, usually for "critical" events such as
  5069. +management of thermal sensors and fans. Sometimes though, SMIs are used for
  5070. +other tasks and those tasks can spend an inordinate amount of time in the
  5071. +handler (sometimes measured in milliseconds). Obviously this is a problem if
  5072. +you are trying to keep event service latencies down in the microsecond range.
  5073. +
  5074. +The hardware latency detector works by hogging all of the cpus for configurable
  5075. +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
  5076. +for some period, then looking for gaps in the TSC data. Any gap indicates a
  5077. +time when the polling was interrupted and since the machine is stopped and
  5078. +interrupts turned off the only thing that could do that would be an SMI.
  5079. +
  5080. +Note that the SMI detector should *NEVER* be used in a production environment.
  5081. +It is intended to be run manually to determine if the hardware platform has a
  5082. +problem with long system firmware service routines.
  5083. +
  5084. +Usage:
  5085. +------
  5086. +
  5087. +Loading the module hwlat_detector passing the parameter "enabled=1" (or by
  5088. +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
  5089. +step required to start the hwlat_detector. It is possible to redefine the
  5090. +threshold in microseconds (us) above which latency spikes will be taken
  5091. +into account (parameter "threshold=").
  5092. +
  5093. +Example:
  5094. +
  5095. + # modprobe hwlat_detector enabled=1 threshold=100
  5096. +
  5097. +After the module is loaded, it creates a directory named "hwlat_detector" under
  5098. +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
  5099. +to have debugfs mounted, which might be on /sys/debug on your system.
  5100. +
  5101. +The /debug/hwlat_detector interface contains the following files:
  5102. +
  5103. +count - number of latency spikes observed since last reset
  5104. +enable - a global enable/disable toggle (0/1), resets count
  5105. +max - maximum hardware latency actually observed (usecs)
  5106. +sample - a pipe from which to read current raw sample data
  5107. + in the format <timestamp> <latency observed usecs>
  5108. + (can be opened O_NONBLOCK for a single sample)
  5109. +threshold - minimum latency value to be considered (usecs)
  5110. +width - time period to sample with CPUs held (usecs)
  5111. + must be less than the total window size (enforced)
  5112. +window - total period of sampling, width being inside (usecs)
  5113. +
  5114. +By default we will set width to 500,000 and window to 1,000,000, meaning that
  5115. +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
  5116. +observe any latencies that exceed the threshold (initially 100 usecs),
  5117. +then we write to a global sample ring buffer of 8K samples, which is
  5118. +consumed by reading from the "sample" (pipe) debugfs file interface.
  5119. diff -Nur linux-4.1.39.orig/Documentation/sysrq.txt linux-4.1.39/Documentation/sysrq.txt
  5120. --- linux-4.1.39.orig/Documentation/sysrq.txt 2017-03-13 21:04:36.000000000 +0100
  5121. +++ linux-4.1.39/Documentation/sysrq.txt 2017-04-18 17:56:30.545394493 +0200
  5122. @@ -59,10 +59,17 @@
  5123. On other - If you know of the key combos for other architectures, please
  5124. let me know so I can add them to this section.
  5125. -On all - write a character to /proc/sysrq-trigger. e.g.:
  5126. -
  5127. +On all - write a character to /proc/sysrq-trigger, e.g.:
  5128. echo t > /proc/sysrq-trigger
  5129. +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g.
  5130. + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq
  5131. + Send an ICMP echo request with this pattern plus the particular
  5132. + SysRq command key. Example:
  5133. + # ping -c1 -s57 -p0102030468
  5134. + will trigger the SysRq-H (help) command.
  5135. +
  5136. +
  5137. * What are the 'command' keys?
  5138. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  5139. 'b' - Will immediately reboot the system without syncing or unmounting
  5140. diff -Nur linux-4.1.39.orig/Documentation/trace/histograms.txt linux-4.1.39/Documentation/trace/histograms.txt
  5141. --- linux-4.1.39.orig/Documentation/trace/histograms.txt 1970-01-01 01:00:00.000000000 +0100
  5142. +++ linux-4.1.39/Documentation/trace/histograms.txt 2017-04-18 17:56:30.545394493 +0200
  5143. @@ -0,0 +1,186 @@
  5144. + Using the Linux Kernel Latency Histograms
  5145. +
  5146. +
  5147. +This document gives a short explanation how to enable, configure and use
  5148. +latency histograms. Latency histograms are primarily relevant in the
  5149. +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT)
  5150. +and are used in the quality management of the Linux real-time
  5151. +capabilities.
  5152. +
  5153. +
  5154. +* Purpose of latency histograms
  5155. +
  5156. +A latency histogram continuously accumulates the frequencies of latency
  5157. +data. There are two types of histograms
  5158. +- potential sources of latencies
  5159. +- effective latencies
  5160. +
  5161. +
  5162. +* Potential sources of latencies
  5163. +
  5164. +Potential sources of latencies are code segments where interrupts,
  5165. +preemption or both are disabled (aka critical sections). To create
  5166. +histograms of potential sources of latency, the kernel stores the time
  5167. +stamp at the start of a critical section, determines the time elapsed
  5168. +when the end of the section is reached, and increments the frequency
  5169. +counter of that latency value - irrespective of whether any concurrently
  5170. +running process is affected by latency or not.
  5171. +- Configuration items (in the Kernel hacking/Tracers submenu)
  5172. + CONFIG_INTERRUPT_OFF_LATENCY
  5173. + CONFIG_PREEMPT_OFF_LATENCY
  5174. +
  5175. +
  5176. +* Effective latencies
  5177. +
  5178. +Effective latencies are actually occuring during wakeup of a process. To
  5179. +determine effective latencies, the kernel stores the time stamp when a
  5180. +process is scheduled to be woken up, and determines the duration of the
  5181. +wakeup time shortly before control is passed over to this process. Note
  5182. +that the apparent latency in user space may be somewhat longer, since the
  5183. +process may be interrupted after control is passed over to it but before
  5184. +the execution in user space takes place. Simply measuring the interval
  5185. +between enqueuing and wakeup may also not appropriate in cases when a
  5186. +process is scheduled as a result of a timer expiration. The timer may have
  5187. +missed its deadline, e.g. due to disabled interrupts, but this latency
  5188. +would not be registered. Therefore, the offsets of missed timers are
  5189. +recorded in a separate histogram. If both wakeup latency and missed timer
  5190. +offsets are configured and enabled, a third histogram may be enabled that
  5191. +records the overall latency as a sum of the timer latency, if any, and the
  5192. +wakeup latency. This histogram is called "timerandwakeup".
  5193. +- Configuration items (in the Kernel hacking/Tracers submenu)
  5194. + CONFIG_WAKEUP_LATENCY
  5195. + CONFIG_MISSED_TIMER_OFSETS
  5196. +
  5197. +
  5198. +* Usage
  5199. +
  5200. +The interface to the administration of the latency histograms is located
  5201. +in the debugfs file system. To mount it, either enter
  5202. +
  5203. +mount -t sysfs nodev /sys
  5204. +mount -t debugfs nodev /sys/kernel/debug
  5205. +
  5206. +from shell command line level, or add
  5207. +
  5208. +nodev /sys sysfs defaults 0 0
  5209. +nodev /sys/kernel/debug debugfs defaults 0 0
  5210. +
  5211. +to the file /etc/fstab. All latency histogram related files are then
  5212. +available in the directory /sys/kernel/debug/tracing/latency_hist. A
  5213. +particular histogram type is enabled by writing non-zero to the related
  5214. +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory.
  5215. +Select "preemptirqsoff" for the histograms of potential sources of
  5216. +latencies and "wakeup" for histograms of effective latencies etc. The
  5217. +histogram data - one per CPU - are available in the files
  5218. +
  5219. +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx
  5220. +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx
  5221. +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx
  5222. +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx
  5223. +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx
  5224. +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx
  5225. +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx
  5226. +
  5227. +The histograms are reset by writing non-zero to the file "reset" in a
  5228. +particular latency directory. To reset all latency data, use
  5229. +
  5230. +#!/bin/sh
  5231. +
  5232. +TRACINGDIR=/sys/kernel/debug/tracing
  5233. +HISTDIR=$TRACINGDIR/latency_hist
  5234. +
  5235. +if test -d $HISTDIR
  5236. +then
  5237. + cd $HISTDIR
  5238. + for i in `find . | grep /reset$`
  5239. + do
  5240. + echo 1 >$i
  5241. + done
  5242. +fi
  5243. +
  5244. +
  5245. +* Data format
  5246. +
  5247. +Latency data are stored with a resolution of one microsecond. The
  5248. +maximum latency is 10,240 microseconds. The data are only valid, if the
  5249. +overflow register is empty. Every output line contains the latency in
  5250. +microseconds in the first row and the number of samples in the second
  5251. +row. To display only lines with a positive latency count, use, for
  5252. +example,
  5253. +
  5254. +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0
  5255. +
  5256. +#Minimum latency: 0 microseconds.
  5257. +#Average latency: 0 microseconds.
  5258. +#Maximum latency: 25 microseconds.
  5259. +#Total samples: 3104770694
  5260. +#There are 0 samples greater or equal than 10240 microseconds
  5261. +#usecs samples
  5262. + 0 2984486876
  5263. + 1 49843506
  5264. + 2 58219047
  5265. + 3 5348126
  5266. + 4 2187960
  5267. + 5 3388262
  5268. + 6 959289
  5269. + 7 208294
  5270. + 8 40420
  5271. + 9 4485
  5272. + 10 14918
  5273. + 11 18340
  5274. + 12 25052
  5275. + 13 19455
  5276. + 14 5602
  5277. + 15 969
  5278. + 16 47
  5279. + 17 18
  5280. + 18 14
  5281. + 19 1
  5282. + 20 3
  5283. + 21 2
  5284. + 22 5
  5285. + 23 2
  5286. + 25 1
  5287. +
  5288. +
  5289. +* Wakeup latency of a selected process
  5290. +
  5291. +To only collect wakeup latency data of a particular process, write the
  5292. +PID of the requested process to
  5293. +
  5294. +/sys/kernel/debug/tracing/latency_hist/wakeup/pid
  5295. +
  5296. +PIDs are not considered, if this variable is set to 0.
  5297. +
  5298. +
  5299. +* Details of the process with the highest wakeup latency so far
  5300. +
  5301. +Selected data of the process that suffered from the highest wakeup
  5302. +latency that occurred in a particular CPU are available in the file
  5303. +
  5304. +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx.
  5305. +
  5306. +In addition, other relevant system data at the time when the
  5307. +latency occurred are given.
  5308. +
  5309. +The format of the data is (all in one line):
  5310. +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \
  5311. +<- <PID> <Priority> <Command> <Timestamp>
  5312. +
  5313. +The value of <Timeroffset> is only relevant in the combined timer
  5314. +and wakeup latency recording. In the wakeup recording, it is
  5315. +always 0, in the missed_timer_offsets recording, it is the same
  5316. +as <Latency>.
  5317. +
  5318. +When retrospectively searching for the origin of a latency and
  5319. +tracing was not enabled, it may be helpful to know the name and
  5320. +some basic data of the task that (finally) was switching to the
  5321. +late real-tlme task. In addition to the victim's data, also the
  5322. +data of the possible culprit are therefore displayed after the
  5323. +"<-" symbol.
  5324. +
  5325. +Finally, the timestamp of the time when the latency occurred
  5326. +in <seconds>.<microseconds> after the most recent system boot
  5327. +is provided.
  5328. +
  5329. +These data are also reset when the wakeup histogram is reset.
  5330. diff -Nur linux-4.1.39.orig/drivers/acpi/acpica/acglobal.h linux-4.1.39/drivers/acpi/acpica/acglobal.h
  5331. --- linux-4.1.39.orig/drivers/acpi/acpica/acglobal.h 2017-03-13 21:04:36.000000000 +0100
  5332. +++ linux-4.1.39/drivers/acpi/acpica/acglobal.h 2017-04-18 17:56:30.573395579 +0200
  5333. @@ -112,7 +112,7 @@
  5334. * interrupt level
  5335. */
  5336. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */
  5337. -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  5338. +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
  5339. ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
  5340. /* Mutex for _OSI support */
  5341. diff -Nur linux-4.1.39.orig/drivers/acpi/acpica/hwregs.c linux-4.1.39/drivers/acpi/acpica/hwregs.c
  5342. --- linux-4.1.39.orig/drivers/acpi/acpica/hwregs.c 2017-03-13 21:04:36.000000000 +0100
  5343. +++ linux-4.1.39/drivers/acpi/acpica/hwregs.c 2017-04-18 17:56:30.573395579 +0200
  5344. @@ -269,14 +269,14 @@
  5345. ACPI_BITMASK_ALL_FIXED_STATUS,
  5346. ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
  5347. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  5348. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  5349. /* Clear the fixed events in PM1 A/B */
  5350. status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
  5351. ACPI_BITMASK_ALL_FIXED_STATUS);
  5352. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  5353. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  5354. if (ACPI_FAILURE(status)) {
  5355. goto exit;
  5356. diff -Nur linux-4.1.39.orig/drivers/acpi/acpica/hwxface.c linux-4.1.39/drivers/acpi/acpica/hwxface.c
  5357. --- linux-4.1.39.orig/drivers/acpi/acpica/hwxface.c 2017-03-13 21:04:36.000000000 +0100
  5358. +++ linux-4.1.39/drivers/acpi/acpica/hwxface.c 2017-04-18 17:56:30.577395735 +0200
  5359. @@ -374,7 +374,7 @@
  5360. return_ACPI_STATUS(AE_BAD_PARAMETER);
  5361. }
  5362. - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
  5363. + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
  5364. /*
  5365. * At this point, we know that the parent register is one of the
  5366. @@ -435,7 +435,7 @@
  5367. unlock_and_exit:
  5368. - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
  5369. + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
  5370. return_ACPI_STATUS(status);
  5371. }
  5372. diff -Nur linux-4.1.39.orig/drivers/acpi/acpica/utmutex.c linux-4.1.39/drivers/acpi/acpica/utmutex.c
  5373. --- linux-4.1.39.orig/drivers/acpi/acpica/utmutex.c 2017-03-13 21:04:36.000000000 +0100
  5374. +++ linux-4.1.39/drivers/acpi/acpica/utmutex.c 2017-04-18 17:56:30.577395735 +0200
  5375. @@ -88,7 +88,7 @@
  5376. return_ACPI_STATUS (status);
  5377. }
  5378. - status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
  5379. + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock);
  5380. if (ACPI_FAILURE (status)) {
  5381. return_ACPI_STATUS (status);
  5382. }
  5383. @@ -141,7 +141,7 @@
  5384. /* Delete the spinlocks */
  5385. acpi_os_delete_lock(acpi_gbl_gpe_lock);
  5386. - acpi_os_delete_lock(acpi_gbl_hardware_lock);
  5387. + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
  5388. acpi_os_delete_lock(acpi_gbl_reference_count_lock);
  5389. /* Delete the reader/writer lock */
  5390. diff -Nur linux-4.1.39.orig/drivers/ata/libata-sff.c linux-4.1.39/drivers/ata/libata-sff.c
  5391. --- linux-4.1.39.orig/drivers/ata/libata-sff.c 2017-03-13 21:04:36.000000000 +0100
  5392. +++ linux-4.1.39/drivers/ata/libata-sff.c 2017-04-18 17:56:30.577395735 +0200
  5393. @@ -678,9 +678,9 @@
  5394. unsigned long flags;
  5395. unsigned int consumed;
  5396. - local_irq_save(flags);
  5397. + local_irq_save_nort(flags);
  5398. consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
  5399. - local_irq_restore(flags);
  5400. + local_irq_restore_nort(flags);
  5401. return consumed;
  5402. }
  5403. @@ -719,7 +719,7 @@
  5404. unsigned long flags;
  5405. /* FIXME: use a bounce buffer */
  5406. - local_irq_save(flags);
  5407. + local_irq_save_nort(flags);
  5408. buf = kmap_atomic(page);
  5409. /* do the actual data transfer */
  5410. @@ -727,7 +727,7 @@
  5411. do_write);
  5412. kunmap_atomic(buf);
  5413. - local_irq_restore(flags);
  5414. + local_irq_restore_nort(flags);
  5415. } else {
  5416. buf = page_address(page);
  5417. ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
  5418. @@ -864,7 +864,7 @@
  5419. unsigned long flags;
  5420. /* FIXME: use bounce buffer */
  5421. - local_irq_save(flags);
  5422. + local_irq_save_nort(flags);
  5423. buf = kmap_atomic(page);
  5424. /* do the actual data transfer */
  5425. @@ -872,7 +872,7 @@
  5426. count, rw);
  5427. kunmap_atomic(buf);
  5428. - local_irq_restore(flags);
  5429. + local_irq_restore_nort(flags);
  5430. } else {
  5431. buf = page_address(page);
  5432. consumed = ap->ops->sff_data_xfer(dev, buf + offset,
  5433. diff -Nur linux-4.1.39.orig/drivers/block/zram/zram_drv.c linux-4.1.39/drivers/block/zram/zram_drv.c
  5434. --- linux-4.1.39.orig/drivers/block/zram/zram_drv.c 2017-03-13 21:04:36.000000000 +0100
  5435. +++ linux-4.1.39/drivers/block/zram/zram_drv.c 2017-04-18 17:56:30.577395735 +0200
  5436. @@ -386,6 +386,8 @@
  5437. goto out_error;
  5438. }
  5439. + zram_meta_init_table_locks(meta, disksize);
  5440. +
  5441. return meta;
  5442. out_error:
  5443. @@ -484,12 +486,12 @@
  5444. unsigned long handle;
  5445. size_t size;
  5446. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  5447. + zram_lock_table(&meta->table[index]);
  5448. handle = meta->table[index].handle;
  5449. size = zram_get_obj_size(meta, index);
  5450. if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
  5451. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5452. + zram_unlock_table(&meta->table[index]);
  5453. clear_page(mem);
  5454. return 0;
  5455. }
  5456. @@ -500,7 +502,7 @@
  5457. else
  5458. ret = zcomp_decompress(zram->comp, cmem, size, mem);
  5459. zs_unmap_object(meta->mem_pool, handle);
  5460. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5461. + zram_unlock_table(&meta->table[index]);
  5462. /* Should NEVER happen. Return bio error if it does. */
  5463. if (unlikely(ret)) {
  5464. @@ -520,14 +522,14 @@
  5465. struct zram_meta *meta = zram->meta;
  5466. page = bvec->bv_page;
  5467. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  5468. + zram_lock_table(&meta->table[index]);
  5469. if (unlikely(!meta->table[index].handle) ||
  5470. zram_test_flag(meta, index, ZRAM_ZERO)) {
  5471. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5472. + zram_unlock_table(&meta->table[index]);
  5473. handle_zero_page(bvec);
  5474. return 0;
  5475. }
  5476. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5477. + zram_unlock_table(&meta->table[index]);
  5478. if (is_partial_io(bvec))
  5479. /* Use a temporary buffer to decompress the page */
  5480. @@ -622,10 +624,10 @@
  5481. if (user_mem)
  5482. kunmap_atomic(user_mem);
  5483. /* Free memory associated with this sector now. */
  5484. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  5485. + zram_lock_table(&meta->table[index]);
  5486. zram_free_page(zram, index);
  5487. zram_set_flag(meta, index, ZRAM_ZERO);
  5488. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5489. + zram_unlock_table(&meta->table[index]);
  5490. atomic64_inc(&zram->stats.zero_pages);
  5491. ret = 0;
  5492. @@ -685,12 +687,12 @@
  5493. * Free memory associated with this sector
  5494. * before overwriting unused sectors.
  5495. */
  5496. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  5497. + zram_lock_table(&meta->table[index]);
  5498. zram_free_page(zram, index);
  5499. meta->table[index].handle = handle;
  5500. zram_set_obj_size(meta, index, clen);
  5501. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5502. + zram_unlock_table(&meta->table[index]);
  5503. /* Update stats */
  5504. atomic64_add(clen, &zram->stats.compr_data_size);
  5505. @@ -762,9 +764,9 @@
  5506. }
  5507. while (n >= PAGE_SIZE) {
  5508. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  5509. + zram_lock_table(&meta->table[index]);
  5510. zram_free_page(zram, index);
  5511. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5512. + zram_unlock_table(&meta->table[index]);
  5513. atomic64_inc(&zram->stats.notify_free);
  5514. index++;
  5515. n -= PAGE_SIZE;
  5516. @@ -1007,9 +1009,9 @@
  5517. zram = bdev->bd_disk->private_data;
  5518. meta = zram->meta;
  5519. - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
  5520. + zram_lock_table(&meta->table[index]);
  5521. zram_free_page(zram, index);
  5522. - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
  5523. + zram_unlock_table(&meta->table[index]);
  5524. atomic64_inc(&zram->stats.notify_free);
  5525. }
  5526. diff -Nur linux-4.1.39.orig/drivers/block/zram/zram_drv.h linux-4.1.39/drivers/block/zram/zram_drv.h
  5527. --- linux-4.1.39.orig/drivers/block/zram/zram_drv.h 2017-03-13 21:04:36.000000000 +0100
  5528. +++ linux-4.1.39/drivers/block/zram/zram_drv.h 2017-04-18 17:56:30.577395735 +0200
  5529. @@ -78,6 +78,9 @@
  5530. struct zram_table_entry {
  5531. unsigned long handle;
  5532. unsigned long value;
  5533. +#ifdef CONFIG_PREEMPT_RT_BASE
  5534. + spinlock_t lock;
  5535. +#endif
  5536. };
  5537. struct zram_stats {
  5538. @@ -122,4 +125,42 @@
  5539. u64 disksize; /* bytes */
  5540. char compressor[10];
  5541. };
  5542. +
  5543. +#ifndef CONFIG_PREEMPT_RT_BASE
  5544. +static inline void zram_lock_table(struct zram_table_entry *table)
  5545. +{
  5546. + bit_spin_lock(ZRAM_ACCESS, &table->value);
  5547. +}
  5548. +
  5549. +static inline void zram_unlock_table(struct zram_table_entry *table)
  5550. +{
  5551. + bit_spin_unlock(ZRAM_ACCESS, &table->value);
  5552. +}
  5553. +
  5554. +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { }
  5555. +#else /* CONFIG_PREEMPT_RT_BASE */
  5556. +static inline void zram_lock_table(struct zram_table_entry *table)
  5557. +{
  5558. + spin_lock(&table->lock);
  5559. + __set_bit(ZRAM_ACCESS, &table->value);
  5560. +}
  5561. +
  5562. +static inline void zram_unlock_table(struct zram_table_entry *table)
  5563. +{
  5564. + __clear_bit(ZRAM_ACCESS, &table->value);
  5565. + spin_unlock(&table->lock);
  5566. +}
  5567. +
  5568. +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize)
  5569. +{
  5570. + size_t num_pages = disksize >> PAGE_SHIFT;
  5571. + size_t index;
  5572. +
  5573. + for (index = 0; index < num_pages; index++) {
  5574. + spinlock_t *lock = &meta->table[index].lock;
  5575. + spin_lock_init(lock);
  5576. + }
  5577. +}
  5578. +#endif /* CONFIG_PREEMPT_RT_BASE */
  5579. +
  5580. #endif
  5581. diff -Nur linux-4.1.39.orig/drivers/char/random.c linux-4.1.39/drivers/char/random.c
  5582. --- linux-4.1.39.orig/drivers/char/random.c 2017-03-13 21:04:36.000000000 +0100
  5583. +++ linux-4.1.39/drivers/char/random.c 2017-04-18 17:56:30.577395735 +0200
  5584. @@ -776,8 +776,6 @@
  5585. } sample;
  5586. long delta, delta2, delta3;
  5587. - preempt_disable();
  5588. -
  5589. sample.jiffies = jiffies;
  5590. sample.cycles = random_get_entropy();
  5591. sample.num = num;
  5592. @@ -818,7 +816,6 @@
  5593. */
  5594. credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
  5595. }
  5596. - preempt_enable();
  5597. }
  5598. void add_input_randomness(unsigned int type, unsigned int code,
  5599. @@ -871,28 +868,27 @@
  5600. return *(ptr + f->reg_idx++);
  5601. }
  5602. -void add_interrupt_randomness(int irq, int irq_flags)
  5603. +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
  5604. {
  5605. struct entropy_store *r;
  5606. struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness);
  5607. - struct pt_regs *regs = get_irq_regs();
  5608. unsigned long now = jiffies;
  5609. cycles_t cycles = random_get_entropy();
  5610. __u32 c_high, j_high;
  5611. - __u64 ip;
  5612. unsigned long seed;
  5613. int credit = 0;
  5614. if (cycles == 0)
  5615. - cycles = get_reg(fast_pool, regs);
  5616. + cycles = get_reg(fast_pool, NULL);
  5617. c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
  5618. j_high = (sizeof(now) > 4) ? now >> 32 : 0;
  5619. fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
  5620. fast_pool->pool[1] ^= now ^ c_high;
  5621. - ip = regs ? instruction_pointer(regs) : _RET_IP_;
  5622. + if (!ip)
  5623. + ip = _RET_IP_;
  5624. fast_pool->pool[2] ^= ip;
  5625. fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
  5626. - get_reg(fast_pool, regs);
  5627. + get_reg(fast_pool, NULL);
  5628. fast_mix(fast_pool);
  5629. add_interrupt_bench(cycles);
  5630. diff -Nur linux-4.1.39.orig/drivers/clk/at91/pmc.c linux-4.1.39/drivers/clk/at91/pmc.c
  5631. --- linux-4.1.39.orig/drivers/clk/at91/pmc.c 2017-03-13 21:04:36.000000000 +0100
  5632. +++ linux-4.1.39/drivers/clk/at91/pmc.c 2017-04-18 17:56:30.577395735 +0200
  5633. @@ -27,21 +27,6 @@
  5634. void __iomem *at91_pmc_base;
  5635. EXPORT_SYMBOL_GPL(at91_pmc_base);
  5636. -void at91rm9200_idle(void)
  5637. -{
  5638. - /*
  5639. - * Disable the processor clock. The processor will be automatically
  5640. - * re-enabled by an interrupt or by a reset.
  5641. - */
  5642. - at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK);
  5643. -}
  5644. -
  5645. -void at91sam9_idle(void)
  5646. -{
  5647. - at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK);
  5648. - cpu_do_idle();
  5649. -}
  5650. -
  5651. int of_at91_get_clk_range(struct device_node *np, const char *propname,
  5652. struct clk_range *range)
  5653. {
  5654. diff -Nur linux-4.1.39.orig/drivers/clocksource/tcb_clksrc.c linux-4.1.39/drivers/clocksource/tcb_clksrc.c
  5655. --- linux-4.1.39.orig/drivers/clocksource/tcb_clksrc.c 2017-03-13 21:04:36.000000000 +0100
  5656. +++ linux-4.1.39/drivers/clocksource/tcb_clksrc.c 2017-04-18 17:56:30.577395735 +0200
  5657. @@ -23,8 +23,7 @@
  5658. * this 32 bit free-running counter. the second channel is not used.
  5659. *
  5660. * - The third channel may be used to provide a 16-bit clockevent
  5661. - * source, used in either periodic or oneshot mode. This runs
  5662. - * at 32 KiHZ, and can handle delays of up to two seconds.
  5663. + * source, used in either periodic or oneshot mode.
  5664. *
  5665. * A boot clocksource and clockevent source are also currently needed,
  5666. * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
  5667. @@ -74,6 +73,7 @@
  5668. struct tc_clkevt_device {
  5669. struct clock_event_device clkevt;
  5670. struct clk *clk;
  5671. + u32 freq;
  5672. void __iomem *regs;
  5673. };
  5674. @@ -82,13 +82,6 @@
  5675. return container_of(clkevt, struct tc_clkevt_device, clkevt);
  5676. }
  5677. -/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
  5678. - * because using one of the divided clocks would usually mean the
  5679. - * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
  5680. - *
  5681. - * A divided clock could be good for high resolution timers, since
  5682. - * 30.5 usec resolution can seem "low".
  5683. - */
  5684. static u32 timer_clock;
  5685. static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
  5686. @@ -111,11 +104,12 @@
  5687. case CLOCK_EVT_MODE_PERIODIC:
  5688. clk_enable(tcd->clk);
  5689. - /* slow clock, count up to RC, then irq and restart */
  5690. + /* count up to RC, then irq and restart */
  5691. __raw_writel(timer_clock
  5692. | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
  5693. regs + ATMEL_TC_REG(2, CMR));
  5694. - __raw_writel((32768 + HZ/2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
  5695. + __raw_writel((tcd->freq + HZ / 2) / HZ,
  5696. + tcaddr + ATMEL_TC_REG(2, RC));
  5697. /* Enable clock and interrupts on RC compare */
  5698. __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
  5699. @@ -128,7 +122,7 @@
  5700. case CLOCK_EVT_MODE_ONESHOT:
  5701. clk_enable(tcd->clk);
  5702. - /* slow clock, count up to RC, then irq and stop */
  5703. + /* count up to RC, then irq and stop */
  5704. __raw_writel(timer_clock | ATMEL_TC_CPCSTOP
  5705. | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
  5706. regs + ATMEL_TC_REG(2, CMR));
  5707. @@ -157,8 +151,12 @@
  5708. .name = "tc_clkevt",
  5709. .features = CLOCK_EVT_FEAT_PERIODIC
  5710. | CLOCK_EVT_FEAT_ONESHOT,
  5711. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  5712. /* Should be lower than at91rm9200's system timer */
  5713. .rating = 125,
  5714. +#else
  5715. + .rating = 200,
  5716. +#endif
  5717. .set_next_event = tc_next_event,
  5718. .set_mode = tc_mode,
  5719. },
  5720. @@ -178,8 +176,9 @@
  5721. return IRQ_NONE;
  5722. }
  5723. -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
  5724. +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
  5725. {
  5726. + unsigned divisor = atmel_tc_divisors[divisor_idx];
  5727. int ret;
  5728. struct clk *t2_clk = tc->clk[2];
  5729. int irq = tc->irq[2];
  5730. @@ -193,7 +192,11 @@
  5731. clkevt.regs = tc->regs;
  5732. clkevt.clk = t2_clk;
  5733. - timer_clock = clk32k_divisor_idx;
  5734. + timer_clock = divisor_idx;
  5735. + if (!divisor)
  5736. + clkevt.freq = 32768;
  5737. + else
  5738. + clkevt.freq = clk_get_rate(t2_clk) / divisor;
  5739. clkevt.clkevt.cpumask = cpumask_of(0);
  5740. @@ -203,7 +206,7 @@
  5741. return ret;
  5742. }
  5743. - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
  5744. + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
  5745. return ret;
  5746. }
  5747. @@ -340,7 +343,11 @@
  5748. goto err_disable_t1;
  5749. /* channel 2: periodic and oneshot timer support */
  5750. +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  5751. ret = setup_clkevents(tc, clk32k_divisor_idx);
  5752. +#else
  5753. + ret = setup_clkevents(tc, best_divisor_idx);
  5754. +#endif
  5755. if (ret)
  5756. goto err_unregister_clksrc;
  5757. diff -Nur linux-4.1.39.orig/drivers/clocksource/timer-atmel-pit.c linux-4.1.39/drivers/clocksource/timer-atmel-pit.c
  5758. --- linux-4.1.39.orig/drivers/clocksource/timer-atmel-pit.c 2017-03-13 21:04:36.000000000 +0100
  5759. +++ linux-4.1.39/drivers/clocksource/timer-atmel-pit.c 2017-04-18 17:56:30.577395735 +0200
  5760. @@ -90,6 +90,7 @@
  5761. return elapsed;
  5762. }
  5763. +static struct irqaction at91sam926x_pit_irq;
  5764. /*
  5765. * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
  5766. */
  5767. @@ -100,6 +101,8 @@
  5768. switch (mode) {
  5769. case CLOCK_EVT_MODE_PERIODIC:
  5770. + /* Set up irq handler */
  5771. + setup_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq);
  5772. /* update clocksource counter */
  5773. data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
  5774. pit_write(data->base, AT91_PIT_MR,
  5775. @@ -113,6 +116,7 @@
  5776. /* disable irq, leaving the clocksource active */
  5777. pit_write(data->base, AT91_PIT_MR,
  5778. (data->cycle - 1) | AT91_PIT_PITEN);
  5779. + remove_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq);
  5780. break;
  5781. case CLOCK_EVT_MODE_RESUME:
  5782. break;
  5783. diff -Nur linux-4.1.39.orig/drivers/clocksource/timer-atmel-st.c linux-4.1.39/drivers/clocksource/timer-atmel-st.c
  5784. --- linux-4.1.39.orig/drivers/clocksource/timer-atmel-st.c 2017-03-13 21:04:36.000000000 +0100
  5785. +++ linux-4.1.39/drivers/clocksource/timer-atmel-st.c 2017-04-18 17:56:30.577395735 +0200
  5786. @@ -131,6 +131,7 @@
  5787. break;
  5788. case CLOCK_EVT_MODE_SHUTDOWN:
  5789. case CLOCK_EVT_MODE_UNUSED:
  5790. + remove_irq(NR_IRQS_LEGACY + AT91_ID_SYS, &at91rm9200_timer_irq);
  5791. case CLOCK_EVT_MODE_RESUME:
  5792. irqmask = 0;
  5793. break;
  5794. diff -Nur linux-4.1.39.orig/drivers/cpufreq/cpufreq.c linux-4.1.39/drivers/cpufreq/cpufreq.c
  5795. --- linux-4.1.39.orig/drivers/cpufreq/cpufreq.c 2017-03-13 21:04:36.000000000 +0100
  5796. +++ linux-4.1.39/drivers/cpufreq/cpufreq.c 2017-04-18 17:56:30.577395735 +0200
  5797. @@ -64,12 +64,6 @@
  5798. return cpufreq_driver->target_index || cpufreq_driver->target;
  5799. }
  5800. -/*
  5801. - * rwsem to guarantee that cpufreq driver module doesn't unload during critical
  5802. - * sections
  5803. - */
  5804. -static DECLARE_RWSEM(cpufreq_rwsem);
  5805. -
  5806. /* internal prototypes */
  5807. static int __cpufreq_governor(struct cpufreq_policy *policy,
  5808. unsigned int event);
  5809. @@ -215,9 +209,6 @@
  5810. if (cpu >= nr_cpu_ids)
  5811. return NULL;
  5812. - if (!down_read_trylock(&cpufreq_rwsem))
  5813. - return NULL;
  5814. -
  5815. /* get the cpufreq driver */
  5816. read_lock_irqsave(&cpufreq_driver_lock, flags);
  5817. @@ -230,9 +221,6 @@
  5818. read_unlock_irqrestore(&cpufreq_driver_lock, flags);
  5819. - if (!policy)
  5820. - up_read(&cpufreq_rwsem);
  5821. -
  5822. return policy;
  5823. }
  5824. EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
  5825. @@ -240,7 +228,6 @@
  5826. void cpufreq_cpu_put(struct cpufreq_policy *policy)
  5827. {
  5828. kobject_put(&policy->kobj);
  5829. - up_read(&cpufreq_rwsem);
  5830. }
  5831. EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
  5832. @@ -765,9 +752,6 @@
  5833. struct freq_attr *fattr = to_attr(attr);
  5834. ssize_t ret;
  5835. - if (!down_read_trylock(&cpufreq_rwsem))
  5836. - return -EINVAL;
  5837. -
  5838. down_read(&policy->rwsem);
  5839. if (fattr->show)
  5840. @@ -776,7 +760,6 @@
  5841. ret = -EIO;
  5842. up_read(&policy->rwsem);
  5843. - up_read(&cpufreq_rwsem);
  5844. return ret;
  5845. }
  5846. @@ -793,9 +776,6 @@
  5847. if (!cpu_online(policy->cpu))
  5848. goto unlock;
  5849. - if (!down_read_trylock(&cpufreq_rwsem))
  5850. - goto unlock;
  5851. -
  5852. down_write(&policy->rwsem);
  5853. if (fattr->store)
  5854. @@ -804,8 +784,6 @@
  5855. ret = -EIO;
  5856. up_write(&policy->rwsem);
  5857. -
  5858. - up_read(&cpufreq_rwsem);
  5859. unlock:
  5860. put_online_cpus();
  5861. @@ -1117,16 +1095,12 @@
  5862. if (unlikely(policy))
  5863. return 0;
  5864. - if (!down_read_trylock(&cpufreq_rwsem))
  5865. - return 0;
  5866. -
  5867. /* Check if this cpu was hot-unplugged earlier and has siblings */
  5868. read_lock_irqsave(&cpufreq_driver_lock, flags);
  5869. for_each_policy(policy) {
  5870. if (cpumask_test_cpu(cpu, policy->related_cpus)) {
  5871. read_unlock_irqrestore(&cpufreq_driver_lock, flags);
  5872. ret = cpufreq_add_policy_cpu(policy, cpu, dev);
  5873. - up_read(&cpufreq_rwsem);
  5874. return ret;
  5875. }
  5876. }
  5877. @@ -1269,8 +1243,6 @@
  5878. kobject_uevent(&policy->kobj, KOBJ_ADD);
  5879. - up_read(&cpufreq_rwsem);
  5880. -
  5881. /* Callback for handling stuff after policy is ready */
  5882. if (cpufreq_driver->ready)
  5883. cpufreq_driver->ready(policy);
  5884. @@ -1304,8 +1276,6 @@
  5885. cpufreq_policy_free(policy);
  5886. nomem_out:
  5887. - up_read(&cpufreq_rwsem);
  5888. -
  5889. return ret;
  5890. }
  5891. @@ -2499,19 +2469,20 @@
  5892. pr_debug("unregistering driver %s\n", driver->name);
  5893. + /* Protect against concurrent cpu hotplug */
  5894. + get_online_cpus();
  5895. subsys_interface_unregister(&cpufreq_interface);
  5896. if (cpufreq_boost_supported())
  5897. cpufreq_sysfs_remove_file(&boost.attr);
  5898. unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
  5899. - down_write(&cpufreq_rwsem);
  5900. write_lock_irqsave(&cpufreq_driver_lock, flags);
  5901. cpufreq_driver = NULL;
  5902. write_unlock_irqrestore(&cpufreq_driver_lock, flags);
  5903. - up_write(&cpufreq_rwsem);
  5904. + put_online_cpus();
  5905. return 0;
  5906. }
  5907. diff -Nur linux-4.1.39.orig/drivers/cpufreq/Kconfig.x86 linux-4.1.39/drivers/cpufreq/Kconfig.x86
  5908. --- linux-4.1.39.orig/drivers/cpufreq/Kconfig.x86 2017-03-13 21:04:36.000000000 +0100
  5909. +++ linux-4.1.39/drivers/cpufreq/Kconfig.x86 2017-04-18 17:56:30.577395735 +0200
  5910. @@ -123,7 +123,7 @@
  5911. config X86_POWERNOW_K8
  5912. tristate "AMD Opteron/Athlon64 PowerNow!"
  5913. - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
  5914. + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
  5915. help
  5916. This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
  5917. Support for K10 and newer processors is now in acpi-cpufreq.
  5918. diff -Nur linux-4.1.39.orig/drivers/gpio/gpio-omap.c linux-4.1.39/drivers/gpio/gpio-omap.c
  5919. --- linux-4.1.39.orig/drivers/gpio/gpio-omap.c 2017-03-13 21:04:36.000000000 +0100
  5920. +++ linux-4.1.39/drivers/gpio/gpio-omap.c 2017-04-18 17:56:30.577395735 +0200
  5921. @@ -29,6 +29,7 @@
  5922. #include <linux/platform_data/gpio-omap.h>
  5923. #define OFF_MODE 1
  5924. +#define OMAP4_GPIO_DEBOUNCINGTIME_MASK 0xFF
  5925. static LIST_HEAD(omap_gpio_list);
  5926. @@ -50,14 +51,15 @@
  5927. struct gpio_bank {
  5928. struct list_head node;
  5929. void __iomem *base;
  5930. - u16 irq;
  5931. + int irq;
  5932. u32 non_wakeup_gpios;
  5933. u32 enabled_non_wakeup_gpios;
  5934. struct gpio_regs context;
  5935. u32 saved_datain;
  5936. u32 level_mask;
  5937. u32 toggle_mask;
  5938. - spinlock_t lock;
  5939. + raw_spinlock_t lock;
  5940. + raw_spinlock_t wa_lock;
  5941. struct gpio_chip chip;
  5942. struct clk *dbck;
  5943. u32 mod_usage;
  5944. @@ -67,7 +69,7 @@
  5945. struct device *dev;
  5946. bool is_mpuio;
  5947. bool dbck_flag;
  5948. - bool loses_context;
  5949. +
  5950. bool context_valid;
  5951. int stride;
  5952. u32 width;
  5953. @@ -175,7 +177,7 @@
  5954. static inline void omap_gpio_dbck_enable(struct gpio_bank *bank)
  5955. {
  5956. if (bank->dbck_enable_mask && !bank->dbck_enabled) {
  5957. - clk_prepare_enable(bank->dbck);
  5958. + clk_enable(bank->dbck);
  5959. bank->dbck_enabled = true;
  5960. writel_relaxed(bank->dbck_enable_mask,
  5961. @@ -193,7 +195,7 @@
  5962. */
  5963. writel_relaxed(0, bank->base + bank->regs->debounce_en);
  5964. - clk_disable_unprepare(bank->dbck);
  5965. + clk_disable(bank->dbck);
  5966. bank->dbck_enabled = false;
  5967. }
  5968. }
  5969. @@ -204,8 +206,9 @@
  5970. * @offset: the gpio number on this @bank
  5971. * @debounce: debounce time to use
  5972. *
  5973. - * OMAP's debounce time is in 31us steps so we need
  5974. - * to convert and round up to the closest unit.
  5975. + * OMAP's debounce time is in 31us steps
  5976. + * <debounce time> = (GPIO_DEBOUNCINGTIME[7:0].DEBOUNCETIME + 1) x 31
  5977. + * so we need to convert and round up to the closest unit.
  5978. */
  5979. static void omap2_set_gpio_debounce(struct gpio_bank *bank, unsigned offset,
  5980. unsigned debounce)
  5981. @@ -213,34 +216,33 @@
  5982. void __iomem *reg;
  5983. u32 val;
  5984. u32 l;
  5985. + bool enable = !!debounce;
  5986. if (!bank->dbck_flag)
  5987. return;
  5988. - if (debounce < 32)
  5989. - debounce = 0x01;
  5990. - else if (debounce > 7936)
  5991. - debounce = 0xff;
  5992. - else
  5993. - debounce = (debounce / 0x1f) - 1;
  5994. + if (enable) {
  5995. + debounce = DIV_ROUND_UP(debounce, 31) - 1;
  5996. + debounce &= OMAP4_GPIO_DEBOUNCINGTIME_MASK;
  5997. + }
  5998. l = BIT(offset);
  5999. - clk_prepare_enable(bank->dbck);
  6000. + clk_enable(bank->dbck);
  6001. reg = bank->base + bank->regs->debounce;
  6002. writel_relaxed(debounce, reg);
  6003. reg = bank->base + bank->regs->debounce_en;
  6004. val = readl_relaxed(reg);
  6005. - if (debounce)
  6006. + if (enable)
  6007. val |= l;
  6008. else
  6009. val &= ~l;
  6010. bank->dbck_enable_mask = val;
  6011. writel_relaxed(val, reg);
  6012. - clk_disable_unprepare(bank->dbck);
  6013. + clk_disable(bank->dbck);
  6014. /*
  6015. * Enable debounce clock per module.
  6016. * This call is mandatory because in omap_gpio_request() when
  6017. @@ -285,7 +287,7 @@
  6018. bank->context.debounce = 0;
  6019. writel_relaxed(bank->context.debounce, bank->base +
  6020. bank->regs->debounce);
  6021. - clk_disable_unprepare(bank->dbck);
  6022. + clk_disable(bank->dbck);
  6023. bank->dbck_enabled = false;
  6024. }
  6025. }
  6026. @@ -488,9 +490,6 @@
  6027. unsigned long flags;
  6028. unsigned offset = d->hwirq;
  6029. - if (!BANK_USED(bank))
  6030. - pm_runtime_get_sync(bank->dev);
  6031. -
  6032. if (type & ~IRQ_TYPE_SENSE_MASK)
  6033. return -EINVAL;
  6034. @@ -498,20 +497,28 @@
  6035. (type & (IRQ_TYPE_LEVEL_LOW|IRQ_TYPE_LEVEL_HIGH)))
  6036. return -EINVAL;
  6037. - spin_lock_irqsave(&bank->lock, flags);
  6038. + raw_spin_lock_irqsave(&bank->lock, flags);
  6039. retval = omap_set_gpio_triggering(bank, offset, type);
  6040. + if (retval) {
  6041. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6042. + goto error;
  6043. + }
  6044. omap_gpio_init_irq(bank, offset);
  6045. if (!omap_gpio_is_input(bank, offset)) {
  6046. - spin_unlock_irqrestore(&bank->lock, flags);
  6047. - return -EINVAL;
  6048. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6049. + retval = -EINVAL;
  6050. + goto error;
  6051. }
  6052. - spin_unlock_irqrestore(&bank->lock, flags);
  6053. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6054. if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
  6055. __irq_set_handler_locked(d->irq, handle_level_irq);
  6056. else if (type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
  6057. __irq_set_handler_locked(d->irq, handle_edge_irq);
  6058. + return 0;
  6059. +
  6060. +error:
  6061. return retval;
  6062. }
  6063. @@ -626,34 +633,30 @@
  6064. return -EINVAL;
  6065. }
  6066. - spin_lock_irqsave(&bank->lock, flags);
  6067. + raw_spin_lock_irqsave(&bank->lock, flags);
  6068. if (enable)
  6069. bank->context.wake_en |= gpio_bit;
  6070. else
  6071. bank->context.wake_en &= ~gpio_bit;
  6072. writel_relaxed(bank->context.wake_en, bank->base + bank->regs->wkup_en);
  6073. - spin_unlock_irqrestore(&bank->lock, flags);
  6074. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6075. return 0;
  6076. }
  6077. -static void omap_reset_gpio(struct gpio_bank *bank, unsigned offset)
  6078. -{
  6079. - omap_set_gpio_direction(bank, offset, 1);
  6080. - omap_set_gpio_irqenable(bank, offset, 0);
  6081. - omap_clear_gpio_irqstatus(bank, offset);
  6082. - omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
  6083. - omap_clear_gpio_debounce(bank, offset);
  6084. -}
  6085. -
  6086. /* Use disable_irq_wake() and enable_irq_wake() functions from drivers */
  6087. static int omap_gpio_wake_enable(struct irq_data *d, unsigned int enable)
  6088. {
  6089. struct gpio_bank *bank = omap_irq_data_get_bank(d);
  6090. unsigned offset = d->hwirq;
  6091. + int ret;
  6092. +
  6093. + ret = omap_set_gpio_wakeup(bank, offset, enable);
  6094. + if (!ret)
  6095. + ret = irq_set_irq_wake(bank->irq, enable);
  6096. - return omap_set_gpio_wakeup(bank, offset, enable);
  6097. + return ret;
  6098. }
  6099. static int omap_gpio_request(struct gpio_chip *chip, unsigned offset)
  6100. @@ -668,17 +671,10 @@
  6101. if (!BANK_USED(bank))
  6102. pm_runtime_get_sync(bank->dev);
  6103. - spin_lock_irqsave(&bank->lock, flags);
  6104. - /* Set trigger to none. You need to enable the desired trigger with
  6105. - * request_irq() or set_irq_type(). Only do this if the IRQ line has
  6106. - * not already been requested.
  6107. - */
  6108. - if (!LINE_USED(bank->irq_usage, offset)) {
  6109. - omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
  6110. - omap_enable_gpio_module(bank, offset);
  6111. - }
  6112. + raw_spin_lock_irqsave(&bank->lock, flags);
  6113. + omap_enable_gpio_module(bank, offset);
  6114. bank->mod_usage |= BIT(offset);
  6115. - spin_unlock_irqrestore(&bank->lock, flags);
  6116. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6117. return 0;
  6118. }
  6119. @@ -688,11 +684,14 @@
  6120. struct gpio_bank *bank = container_of(chip, struct gpio_bank, chip);
  6121. unsigned long flags;
  6122. - spin_lock_irqsave(&bank->lock, flags);
  6123. + raw_spin_lock_irqsave(&bank->lock, flags);
  6124. bank->mod_usage &= ~(BIT(offset));
  6125. + if (!LINE_USED(bank->irq_usage, offset)) {
  6126. + omap_set_gpio_direction(bank, offset, 1);
  6127. + omap_clear_gpio_debounce(bank, offset);
  6128. + }
  6129. omap_disable_gpio_module(bank, offset);
  6130. - omap_reset_gpio(bank, offset);
  6131. - spin_unlock_irqrestore(&bank->lock, flags);
  6132. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6133. /*
  6134. * If this is the last gpio to be freed in the bank,
  6135. @@ -711,29 +710,27 @@
  6136. * line's interrupt handler has been run, we may miss some nested
  6137. * interrupts.
  6138. */
  6139. -static void omap_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
  6140. +static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank)
  6141. {
  6142. void __iomem *isr_reg = NULL;
  6143. u32 isr;
  6144. unsigned int bit;
  6145. - struct gpio_bank *bank;
  6146. - int unmasked = 0;
  6147. - struct irq_chip *irqchip = irq_desc_get_chip(desc);
  6148. - struct gpio_chip *chip = irq_get_handler_data(irq);
  6149. + struct gpio_bank *bank = gpiobank;
  6150. + unsigned long wa_lock_flags;
  6151. + unsigned long lock_flags;
  6152. - chained_irq_enter(irqchip, desc);
  6153. -
  6154. - bank = container_of(chip, struct gpio_bank, chip);
  6155. isr_reg = bank->base + bank->regs->irqstatus;
  6156. - pm_runtime_get_sync(bank->dev);
  6157. -
  6158. if (WARN_ON(!isr_reg))
  6159. goto exit;
  6160. + pm_runtime_get_sync(bank->dev);
  6161. +
  6162. while (1) {
  6163. u32 isr_saved, level_mask = 0;
  6164. u32 enabled;
  6165. + raw_spin_lock_irqsave(&bank->lock, lock_flags);
  6166. +
  6167. enabled = omap_get_gpio_irqbank_mask(bank);
  6168. isr_saved = isr = readl_relaxed(isr_reg) & enabled;
  6169. @@ -747,12 +744,7 @@
  6170. omap_clear_gpio_irqbank(bank, isr_saved & ~level_mask);
  6171. omap_enable_gpio_irqbank(bank, isr_saved & ~level_mask);
  6172. - /* if there is only edge sensitive GPIO pin interrupts
  6173. - configured, we could unmask GPIO bank interrupt immediately */
  6174. - if (!level_mask && !unmasked) {
  6175. - unmasked = 1;
  6176. - chained_irq_exit(irqchip, desc);
  6177. - }
  6178. + raw_spin_unlock_irqrestore(&bank->lock, lock_flags);
  6179. if (!isr)
  6180. break;
  6181. @@ -761,6 +753,7 @@
  6182. bit = __ffs(isr);
  6183. isr &= ~(BIT(bit));
  6184. + raw_spin_lock_irqsave(&bank->lock, lock_flags);
  6185. /*
  6186. * Some chips can't respond to both rising and falling
  6187. * at the same time. If this irq was requested with
  6188. @@ -771,18 +764,20 @@
  6189. if (bank->toggle_mask & (BIT(bit)))
  6190. omap_toggle_gpio_edge_triggering(bank, bit);
  6191. + raw_spin_unlock_irqrestore(&bank->lock, lock_flags);
  6192. +
  6193. + raw_spin_lock_irqsave(&bank->wa_lock, wa_lock_flags);
  6194. +
  6195. generic_handle_irq(irq_find_mapping(bank->chip.irqdomain,
  6196. bit));
  6197. +
  6198. + raw_spin_unlock_irqrestore(&bank->wa_lock,
  6199. + wa_lock_flags);
  6200. }
  6201. }
  6202. - /* if bank has any level sensitive GPIO pin interrupt
  6203. - configured, we must unmask the bank interrupt only after
  6204. - handler(s) are executed in order to avoid spurious bank
  6205. - interrupt */
  6206. exit:
  6207. - if (!unmasked)
  6208. - chained_irq_exit(irqchip, desc);
  6209. pm_runtime_put(bank->dev);
  6210. + return IRQ_HANDLED;
  6211. }
  6212. static unsigned int omap_gpio_irq_startup(struct irq_data *d)
  6213. @@ -791,15 +786,22 @@
  6214. unsigned long flags;
  6215. unsigned offset = d->hwirq;
  6216. - if (!BANK_USED(bank))
  6217. - pm_runtime_get_sync(bank->dev);
  6218. + raw_spin_lock_irqsave(&bank->lock, flags);
  6219. - spin_lock_irqsave(&bank->lock, flags);
  6220. - omap_gpio_init_irq(bank, offset);
  6221. - spin_unlock_irqrestore(&bank->lock, flags);
  6222. + if (!LINE_USED(bank->mod_usage, offset))
  6223. + omap_set_gpio_direction(bank, offset, 1);
  6224. + else if (!omap_gpio_is_input(bank, offset))
  6225. + goto err;
  6226. + omap_enable_gpio_module(bank, offset);
  6227. + bank->irq_usage |= BIT(offset);
  6228. +
  6229. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6230. omap_gpio_unmask_irq(d);
  6231. return 0;
  6232. +err:
  6233. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6234. + return -EINVAL;
  6235. }
  6236. static void omap_gpio_irq_shutdown(struct irq_data *d)
  6237. @@ -808,11 +810,28 @@
  6238. unsigned long flags;
  6239. unsigned offset = d->hwirq;
  6240. - spin_lock_irqsave(&bank->lock, flags);
  6241. + raw_spin_lock_irqsave(&bank->lock, flags);
  6242. bank->irq_usage &= ~(BIT(offset));
  6243. + omap_set_gpio_irqenable(bank, offset, 0);
  6244. + omap_clear_gpio_irqstatus(bank, offset);
  6245. + omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
  6246. + if (!LINE_USED(bank->mod_usage, offset))
  6247. + omap_clear_gpio_debounce(bank, offset);
  6248. omap_disable_gpio_module(bank, offset);
  6249. - omap_reset_gpio(bank, offset);
  6250. - spin_unlock_irqrestore(&bank->lock, flags);
  6251. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6252. +}
  6253. +
  6254. +static void omap_gpio_irq_bus_lock(struct irq_data *data)
  6255. +{
  6256. + struct gpio_bank *bank = omap_irq_data_get_bank(data);
  6257. +
  6258. + if (!BANK_USED(bank))
  6259. + pm_runtime_get_sync(bank->dev);
  6260. +}
  6261. +
  6262. +static void gpio_irq_bus_sync_unlock(struct irq_data *data)
  6263. +{
  6264. + struct gpio_bank *bank = omap_irq_data_get_bank(data);
  6265. /*
  6266. * If this is the last IRQ to be freed in the bank,
  6267. @@ -836,10 +855,10 @@
  6268. unsigned offset = d->hwirq;
  6269. unsigned long flags;
  6270. - spin_lock_irqsave(&bank->lock, flags);
  6271. + raw_spin_lock_irqsave(&bank->lock, flags);
  6272. omap_set_gpio_irqenable(bank, offset, 0);
  6273. omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
  6274. - spin_unlock_irqrestore(&bank->lock, flags);
  6275. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6276. }
  6277. static void omap_gpio_unmask_irq(struct irq_data *d)
  6278. @@ -849,7 +868,7 @@
  6279. u32 trigger = irqd_get_trigger_type(d);
  6280. unsigned long flags;
  6281. - spin_lock_irqsave(&bank->lock, flags);
  6282. + raw_spin_lock_irqsave(&bank->lock, flags);
  6283. if (trigger)
  6284. omap_set_gpio_triggering(bank, offset, trigger);
  6285. @@ -861,7 +880,7 @@
  6286. }
  6287. omap_set_gpio_irqenable(bank, offset, 1);
  6288. - spin_unlock_irqrestore(&bank->lock, flags);
  6289. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6290. }
  6291. /*---------------------------------------------------------------------*/
  6292. @@ -874,9 +893,9 @@
  6293. OMAP_MPUIO_GPIO_MASKIT / bank->stride;
  6294. unsigned long flags;
  6295. - spin_lock_irqsave(&bank->lock, flags);
  6296. + raw_spin_lock_irqsave(&bank->lock, flags);
  6297. writel_relaxed(0xffff & ~bank->context.wake_en, mask_reg);
  6298. - spin_unlock_irqrestore(&bank->lock, flags);
  6299. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6300. return 0;
  6301. }
  6302. @@ -889,9 +908,9 @@
  6303. OMAP_MPUIO_GPIO_MASKIT / bank->stride;
  6304. unsigned long flags;
  6305. - spin_lock_irqsave(&bank->lock, flags);
  6306. + raw_spin_lock_irqsave(&bank->lock, flags);
  6307. writel_relaxed(bank->context.wake_en, mask_reg);
  6308. - spin_unlock_irqrestore(&bank->lock, flags);
  6309. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6310. return 0;
  6311. }
  6312. @@ -937,9 +956,9 @@
  6313. bank = container_of(chip, struct gpio_bank, chip);
  6314. reg = bank->base + bank->regs->direction;
  6315. - spin_lock_irqsave(&bank->lock, flags);
  6316. + raw_spin_lock_irqsave(&bank->lock, flags);
  6317. dir = !!(readl_relaxed(reg) & BIT(offset));
  6318. - spin_unlock_irqrestore(&bank->lock, flags);
  6319. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6320. return dir;
  6321. }
  6322. @@ -949,9 +968,9 @@
  6323. unsigned long flags;
  6324. bank = container_of(chip, struct gpio_bank, chip);
  6325. - spin_lock_irqsave(&bank->lock, flags);
  6326. + raw_spin_lock_irqsave(&bank->lock, flags);
  6327. omap_set_gpio_direction(bank, offset, 1);
  6328. - spin_unlock_irqrestore(&bank->lock, flags);
  6329. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6330. return 0;
  6331. }
  6332. @@ -973,10 +992,10 @@
  6333. unsigned long flags;
  6334. bank = container_of(chip, struct gpio_bank, chip);
  6335. - spin_lock_irqsave(&bank->lock, flags);
  6336. + raw_spin_lock_irqsave(&bank->lock, flags);
  6337. bank->set_dataout(bank, offset, value);
  6338. omap_set_gpio_direction(bank, offset, 0);
  6339. - spin_unlock_irqrestore(&bank->lock, flags);
  6340. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6341. return 0;
  6342. }
  6343. @@ -988,9 +1007,9 @@
  6344. bank = container_of(chip, struct gpio_bank, chip);
  6345. - spin_lock_irqsave(&bank->lock, flags);
  6346. + raw_spin_lock_irqsave(&bank->lock, flags);
  6347. omap2_set_gpio_debounce(bank, offset, debounce);
  6348. - spin_unlock_irqrestore(&bank->lock, flags);
  6349. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6350. return 0;
  6351. }
  6352. @@ -1001,9 +1020,9 @@
  6353. unsigned long flags;
  6354. bank = container_of(chip, struct gpio_bank, chip);
  6355. - spin_lock_irqsave(&bank->lock, flags);
  6356. + raw_spin_lock_irqsave(&bank->lock, flags);
  6357. bank->set_dataout(bank, offset, value);
  6358. - spin_unlock_irqrestore(&bank->lock, flags);
  6359. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6360. }
  6361. /*---------------------------------------------------------------------*/
  6362. @@ -1048,10 +1067,6 @@
  6363. /* Initialize interface clk ungated, module enabled */
  6364. if (bank->regs->ctrl)
  6365. writel_relaxed(0, base + bank->regs->ctrl);
  6366. -
  6367. - bank->dbck = clk_get(bank->dev, "dbclk");
  6368. - if (IS_ERR(bank->dbck))
  6369. - dev_err(bank->dev, "Could not get gpio dbck\n");
  6370. }
  6371. static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
  6372. @@ -1080,7 +1095,6 @@
  6373. } else {
  6374. bank->chip.label = "gpio";
  6375. bank->chip.base = gpio;
  6376. - gpio += bank->width;
  6377. }
  6378. bank->chip.ngpio = bank->width;
  6379. @@ -1090,6 +1104,9 @@
  6380. return ret;
  6381. }
  6382. + if (!bank->is_mpuio)
  6383. + gpio += bank->width;
  6384. +
  6385. #ifdef CONFIG_ARCH_OMAP1
  6386. /*
  6387. * REVISIT: Once we have OMAP1 supporting SPARSE_IRQ, we can drop
  6388. @@ -1112,7 +1129,7 @@
  6389. }
  6390. ret = gpiochip_irqchip_add(&bank->chip, irqc,
  6391. - irq_base, omap_gpio_irq_handler,
  6392. + irq_base, handle_bad_irq,
  6393. IRQ_TYPE_NONE);
  6394. if (ret) {
  6395. @@ -1121,10 +1138,14 @@
  6396. return -ENODEV;
  6397. }
  6398. - gpiochip_set_chained_irqchip(&bank->chip, irqc,
  6399. - bank->irq, omap_gpio_irq_handler);
  6400. + gpiochip_set_chained_irqchip(&bank->chip, irqc, bank->irq, NULL);
  6401. - return 0;
  6402. + ret = devm_request_irq(bank->dev, bank->irq, omap_gpio_irq_handler,
  6403. + 0, dev_name(bank->dev), bank);
  6404. + if (ret)
  6405. + gpiochip_remove(&bank->chip);
  6406. +
  6407. + return ret;
  6408. }
  6409. static const struct of_device_id omap_gpio_match[];
  6410. @@ -1163,17 +1184,23 @@
  6411. irqc->irq_unmask = omap_gpio_unmask_irq,
  6412. irqc->irq_set_type = omap_gpio_irq_type,
  6413. irqc->irq_set_wake = omap_gpio_wake_enable,
  6414. + irqc->irq_bus_lock = omap_gpio_irq_bus_lock,
  6415. + irqc->irq_bus_sync_unlock = gpio_irq_bus_sync_unlock,
  6416. irqc->name = dev_name(&pdev->dev);
  6417. - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
  6418. - if (unlikely(!res)) {
  6419. - dev_err(dev, "Invalid IRQ resource\n");
  6420. - return -ENODEV;
  6421. + bank->irq = platform_get_irq(pdev, 0);
  6422. + if (bank->irq <= 0) {
  6423. + if (!bank->irq)
  6424. + bank->irq = -ENXIO;
  6425. + if (bank->irq != -EPROBE_DEFER)
  6426. + dev_err(dev,
  6427. + "can't get irq resource ret=%d\n", bank->irq);
  6428. + return bank->irq;
  6429. }
  6430. - bank->irq = res->start;
  6431. bank->dev = dev;
  6432. bank->chip.dev = dev;
  6433. + bank->chip.owner = THIS_MODULE;
  6434. bank->dbck_flag = pdata->dbck_flag;
  6435. bank->stride = pdata->bank_stride;
  6436. bank->width = pdata->bank_width;
  6437. @@ -1183,15 +1210,9 @@
  6438. #ifdef CONFIG_OF_GPIO
  6439. bank->chip.of_node = of_node_get(node);
  6440. #endif
  6441. - if (node) {
  6442. - if (!of_property_read_bool(node, "ti,gpio-always-on"))
  6443. - bank->loses_context = true;
  6444. - } else {
  6445. - bank->loses_context = pdata->loses_context;
  6446. -
  6447. - if (bank->loses_context)
  6448. - bank->get_context_loss_count =
  6449. - pdata->get_context_loss_count;
  6450. + if (!node) {
  6451. + bank->get_context_loss_count =
  6452. + pdata->get_context_loss_count;
  6453. }
  6454. if (bank->regs->set_dataout && bank->regs->clr_dataout)
  6455. @@ -1199,16 +1220,27 @@
  6456. else
  6457. bank->set_dataout = omap_set_gpio_dataout_mask;
  6458. - spin_lock_init(&bank->lock);
  6459. + raw_spin_lock_init(&bank->lock);
  6460. + raw_spin_lock_init(&bank->wa_lock);
  6461. /* Static mapping, never released */
  6462. res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
  6463. bank->base = devm_ioremap_resource(dev, res);
  6464. if (IS_ERR(bank->base)) {
  6465. - irq_domain_remove(bank->chip.irqdomain);
  6466. return PTR_ERR(bank->base);
  6467. }
  6468. + if (bank->dbck_flag) {
  6469. + bank->dbck = devm_clk_get(bank->dev, "dbclk");
  6470. + if (IS_ERR(bank->dbck)) {
  6471. + dev_err(bank->dev,
  6472. + "Could not get gpio dbck. Disable debounce\n");
  6473. + bank->dbck_flag = false;
  6474. + } else {
  6475. + clk_prepare(bank->dbck);
  6476. + }
  6477. + }
  6478. +
  6479. platform_set_drvdata(pdev, bank);
  6480. pm_runtime_enable(bank->dev);
  6481. @@ -1221,8 +1253,11 @@
  6482. omap_gpio_mod_init(bank);
  6483. ret = omap_gpio_chip_init(bank, irqc);
  6484. - if (ret)
  6485. + if (ret) {
  6486. + pm_runtime_put_sync(bank->dev);
  6487. + pm_runtime_disable(bank->dev);
  6488. return ret;
  6489. + }
  6490. omap_gpio_show_rev(bank);
  6491. @@ -1233,6 +1268,19 @@
  6492. return 0;
  6493. }
  6494. +static int omap_gpio_remove(struct platform_device *pdev)
  6495. +{
  6496. + struct gpio_bank *bank = platform_get_drvdata(pdev);
  6497. +
  6498. + list_del(&bank->node);
  6499. + gpiochip_remove(&bank->chip);
  6500. + pm_runtime_disable(bank->dev);
  6501. + if (bank->dbck_flag)
  6502. + clk_unprepare(bank->dbck);
  6503. +
  6504. + return 0;
  6505. +}
  6506. +
  6507. #ifdef CONFIG_ARCH_OMAP2PLUS
  6508. #if defined(CONFIG_PM)
  6509. @@ -1246,7 +1294,7 @@
  6510. unsigned long flags;
  6511. u32 wake_low, wake_hi;
  6512. - spin_lock_irqsave(&bank->lock, flags);
  6513. + raw_spin_lock_irqsave(&bank->lock, flags);
  6514. /*
  6515. * Only edges can generate a wakeup event to the PRCM.
  6516. @@ -1299,7 +1347,7 @@
  6517. bank->get_context_loss_count(bank->dev);
  6518. omap_gpio_dbck_disable(bank);
  6519. - spin_unlock_irqrestore(&bank->lock, flags);
  6520. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6521. return 0;
  6522. }
  6523. @@ -1314,14 +1362,14 @@
  6524. unsigned long flags;
  6525. int c;
  6526. - spin_lock_irqsave(&bank->lock, flags);
  6527. + raw_spin_lock_irqsave(&bank->lock, flags);
  6528. /*
  6529. * On the first resume during the probe, the context has not
  6530. * been initialised and so initialise it now. Also initialise
  6531. * the context loss count.
  6532. */
  6533. - if (bank->loses_context && !bank->context_valid) {
  6534. + if (!bank->context_valid) {
  6535. omap_gpio_init_context(bank);
  6536. if (bank->get_context_loss_count)
  6537. @@ -1342,22 +1390,20 @@
  6538. writel_relaxed(bank->context.risingdetect,
  6539. bank->base + bank->regs->risingdetect);
  6540. - if (bank->loses_context) {
  6541. - if (!bank->get_context_loss_count) {
  6542. + if (!bank->get_context_loss_count) {
  6543. + omap_gpio_restore_context(bank);
  6544. + } else {
  6545. + c = bank->get_context_loss_count(bank->dev);
  6546. + if (c != bank->context_loss_count) {
  6547. omap_gpio_restore_context(bank);
  6548. } else {
  6549. - c = bank->get_context_loss_count(bank->dev);
  6550. - if (c != bank->context_loss_count) {
  6551. - omap_gpio_restore_context(bank);
  6552. - } else {
  6553. - spin_unlock_irqrestore(&bank->lock, flags);
  6554. - return 0;
  6555. - }
  6556. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6557. + return 0;
  6558. }
  6559. }
  6560. if (!bank->workaround_enabled) {
  6561. - spin_unlock_irqrestore(&bank->lock, flags);
  6562. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6563. return 0;
  6564. }
  6565. @@ -1412,18 +1458,19 @@
  6566. }
  6567. bank->workaround_enabled = false;
  6568. - spin_unlock_irqrestore(&bank->lock, flags);
  6569. + raw_spin_unlock_irqrestore(&bank->lock, flags);
  6570. return 0;
  6571. }
  6572. #endif /* CONFIG_PM */
  6573. +#if IS_BUILTIN(CONFIG_GPIO_OMAP)
  6574. void omap2_gpio_prepare_for_idle(int pwr_mode)
  6575. {
  6576. struct gpio_bank *bank;
  6577. list_for_each_entry(bank, &omap_gpio_list, node) {
  6578. - if (!BANK_USED(bank) || !bank->loses_context)
  6579. + if (!BANK_USED(bank))
  6580. continue;
  6581. bank->power_mode = pwr_mode;
  6582. @@ -1437,12 +1484,13 @@
  6583. struct gpio_bank *bank;
  6584. list_for_each_entry(bank, &omap_gpio_list, node) {
  6585. - if (!BANK_USED(bank) || !bank->loses_context)
  6586. + if (!BANK_USED(bank))
  6587. continue;
  6588. pm_runtime_get_sync(bank->dev);
  6589. }
  6590. }
  6591. +#endif
  6592. #if defined(CONFIG_PM)
  6593. static void omap_gpio_init_context(struct gpio_bank *p)
  6594. @@ -1598,6 +1646,7 @@
  6595. static struct platform_driver omap_gpio_driver = {
  6596. .probe = omap_gpio_probe,
  6597. + .remove = omap_gpio_remove,
  6598. .driver = {
  6599. .name = "omap_gpio",
  6600. .pm = &gpio_pm_ops,
  6601. @@ -1615,3 +1664,13 @@
  6602. return platform_driver_register(&omap_gpio_driver);
  6603. }
  6604. postcore_initcall(omap_gpio_drv_reg);
  6605. +
  6606. +static void __exit omap_gpio_exit(void)
  6607. +{
  6608. + platform_driver_unregister(&omap_gpio_driver);
  6609. +}
  6610. +module_exit(omap_gpio_exit);
  6611. +
  6612. +MODULE_DESCRIPTION("omap gpio driver");
  6613. +MODULE_ALIAS("platform:gpio-omap");
  6614. +MODULE_LICENSE("GPL v2");
  6615. diff -Nur linux-4.1.39.orig/drivers/gpio/Kconfig linux-4.1.39/drivers/gpio/Kconfig
  6616. --- linux-4.1.39.orig/drivers/gpio/Kconfig 2017-03-13 21:04:36.000000000 +0100
  6617. +++ linux-4.1.39/drivers/gpio/Kconfig 2017-04-18 17:56:30.577395735 +0200
  6618. @@ -309,7 +309,7 @@
  6619. family of SOCs.
  6620. config GPIO_OMAP
  6621. - bool "TI OMAP GPIO support" if COMPILE_TEST && !ARCH_OMAP2PLUS
  6622. + tristate "TI OMAP GPIO support" if ARCH_OMAP2PLUS || COMPILE_TEST
  6623. default y if ARCH_OMAP
  6624. depends on ARM
  6625. select GENERIC_IRQ_CHIP
  6626. diff -Nur linux-4.1.39.orig/drivers/gpu/drm/i915/i915_gem_execbuffer.c linux-4.1.39/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  6627. --- linux-4.1.39.orig/drivers/gpu/drm/i915/i915_gem_execbuffer.c 2017-03-13 21:04:36.000000000 +0100
  6628. +++ linux-4.1.39/drivers/gpu/drm/i915/i915_gem_execbuffer.c 2017-04-18 17:56:30.577395735 +0200
  6629. @@ -32,6 +32,7 @@
  6630. #include "i915_trace.h"
  6631. #include "intel_drv.h"
  6632. #include <linux/dma_remapping.h>
  6633. +#include <linux/uaccess.h>
  6634. #define __EXEC_OBJECT_HAS_PIN (1<<31)
  6635. #define __EXEC_OBJECT_HAS_FENCE (1<<30)
  6636. @@ -465,7 +466,7 @@
  6637. }
  6638. /* We can't wait for rendering with pagefaults disabled */
  6639. - if (obj->active && in_atomic())
  6640. + if (obj->active && pagefault_disabled())
  6641. return -EFAULT;
  6642. if (use_cpu_reloc(obj))
  6643. @@ -1338,7 +1339,9 @@
  6644. return ret;
  6645. }
  6646. +#ifndef CONFIG_PREEMPT_RT_BASE
  6647. trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
  6648. +#endif
  6649. i915_gem_execbuffer_move_to_active(vmas, ring);
  6650. i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
  6651. diff -Nur linux-4.1.39.orig/drivers/gpu/drm/i915/i915_gem_shrinker.c linux-4.1.39/drivers/gpu/drm/i915/i915_gem_shrinker.c
  6652. --- linux-4.1.39.orig/drivers/gpu/drm/i915/i915_gem_shrinker.c 2017-03-13 21:04:36.000000000 +0100
  6653. +++ linux-4.1.39/drivers/gpu/drm/i915/i915_gem_shrinker.c 2017-04-18 17:56:30.577395735 +0200
  6654. @@ -39,7 +39,7 @@
  6655. if (!mutex_is_locked(mutex))
  6656. return false;
  6657. -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
  6658. +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE)
  6659. return mutex->owner == task;
  6660. #else
  6661. /* Since UP may be pre-empted, we cannot assume that we own the lock */
  6662. diff -Nur linux-4.1.39.orig/drivers/gpu/drm/i915/i915_irq.c linux-4.1.39/drivers/gpu/drm/i915/i915_irq.c
  6663. --- linux-4.1.39.orig/drivers/gpu/drm/i915/i915_irq.c 2017-03-13 21:04:36.000000000 +0100
  6664. +++ linux-4.1.39/drivers/gpu/drm/i915/i915_irq.c 2017-04-18 17:56:30.581395889 +0200
  6665. @@ -676,6 +676,7 @@
  6666. spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  6667. /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  6668. + preempt_disable_rt();
  6669. /* Get optional system timestamp before query. */
  6670. if (stime)
  6671. @@ -727,6 +728,7 @@
  6672. *etime = ktime_get();
  6673. /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  6674. + preempt_enable_rt();
  6675. spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  6676. diff -Nur linux-4.1.39.orig/drivers/gpu/drm/i915/intel_display.c linux-4.1.39/drivers/gpu/drm/i915/intel_display.c
  6677. --- linux-4.1.39.orig/drivers/gpu/drm/i915/intel_display.c 2017-03-13 21:04:36.000000000 +0100
  6678. +++ linux-4.1.39/drivers/gpu/drm/i915/intel_display.c 2017-04-18 17:56:30.581395889 +0200
  6679. @@ -10084,7 +10084,7 @@
  6680. struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
  6681. struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
  6682. - WARN_ON(!in_interrupt());
  6683. + WARN_ON_NONRT(!in_interrupt());
  6684. if (crtc == NULL)
  6685. return;
  6686. diff -Nur linux-4.1.39.orig/drivers/gpu/drm/i915/intel_sprite.c linux-4.1.39/drivers/gpu/drm/i915/intel_sprite.c
  6687. --- linux-4.1.39.orig/drivers/gpu/drm/i915/intel_sprite.c 2017-03-13 21:04:36.000000000 +0100
  6688. +++ linux-4.1.39/drivers/gpu/drm/i915/intel_sprite.c 2017-04-18 17:56:30.581395889 +0200
  6689. @@ -37,6 +37,7 @@
  6690. #include "intel_drv.h"
  6691. #include <drm/i915_drm.h>
  6692. #include "i915_drv.h"
  6693. +#include <linux/locallock.h>
  6694. static bool
  6695. format_is_yuv(uint32_t format)
  6696. @@ -61,6 +62,8 @@
  6697. return DIV_ROUND_UP(usecs * mode->crtc_clock, 1000 * mode->crtc_htotal);
  6698. }
  6699. +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock);
  6700. +
  6701. /**
  6702. * intel_pipe_update_start() - start update of a set of display registers
  6703. * @crtc: the crtc of which the registers are going to be updated
  6704. @@ -101,7 +104,7 @@
  6705. if (WARN_ON(drm_crtc_vblank_get(&crtc->base)))
  6706. return false;
  6707. - local_irq_disable();
  6708. + local_lock_irq(pipe_update_lock);
  6709. trace_i915_pipe_update_start(crtc, min, max);
  6710. @@ -123,11 +126,11 @@
  6711. break;
  6712. }
  6713. - local_irq_enable();
  6714. + local_unlock_irq(pipe_update_lock);
  6715. timeout = schedule_timeout(timeout);
  6716. - local_irq_disable();
  6717. + local_lock_irq(pipe_update_lock);
  6718. }
  6719. finish_wait(wq, &wait);
  6720. @@ -158,7 +161,7 @@
  6721. trace_i915_pipe_update_end(crtc, end_vbl_count);
  6722. - local_irq_enable();
  6723. + local_unlock_irq(pipe_update_lock);
  6724. if (start_vbl_count != end_vbl_count)
  6725. DRM_ERROR("Atomic update failure on pipe %c (start=%u end=%u)\n",
  6726. diff -Nur linux-4.1.39.orig/drivers/gpu/drm/radeon/radeon_display.c linux-4.1.39/drivers/gpu/drm/radeon/radeon_display.c
  6727. --- linux-4.1.39.orig/drivers/gpu/drm/radeon/radeon_display.c 2017-03-13 21:04:36.000000000 +0100
  6728. +++ linux-4.1.39/drivers/gpu/drm/radeon/radeon_display.c 2017-04-18 17:56:30.581395889 +0200
  6729. @@ -1798,6 +1798,7 @@
  6730. struct radeon_device *rdev = dev->dev_private;
  6731. /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
  6732. + preempt_disable_rt();
  6733. /* Get optional system timestamp before query. */
  6734. if (stime)
  6735. @@ -1890,6 +1891,7 @@
  6736. *etime = ktime_get();
  6737. /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
  6738. + preempt_enable_rt();
  6739. /* Decode into vertical and horizontal scanout position. */
  6740. *vpos = position & 0x1fff;
  6741. diff -Nur linux-4.1.39.orig/drivers/i2c/busses/i2c-omap.c linux-4.1.39/drivers/i2c/busses/i2c-omap.c
  6742. --- linux-4.1.39.orig/drivers/i2c/busses/i2c-omap.c 2017-03-13 21:04:36.000000000 +0100
  6743. +++ linux-4.1.39/drivers/i2c/busses/i2c-omap.c 2017-04-18 17:56:30.581395889 +0200
  6744. @@ -996,15 +996,12 @@
  6745. u16 mask;
  6746. u16 stat;
  6747. - spin_lock(&dev->lock);
  6748. - mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
  6749. stat = omap_i2c_read_reg(dev, OMAP_I2C_STAT_REG);
  6750. + mask = omap_i2c_read_reg(dev, OMAP_I2C_IE_REG);
  6751. if (stat & mask)
  6752. ret = IRQ_WAKE_THREAD;
  6753. - spin_unlock(&dev->lock);
  6754. -
  6755. return ret;
  6756. }
  6757. diff -Nur linux-4.1.39.orig/drivers/ide/alim15x3.c linux-4.1.39/drivers/ide/alim15x3.c
  6758. --- linux-4.1.39.orig/drivers/ide/alim15x3.c 2017-03-13 21:04:36.000000000 +0100
  6759. +++ linux-4.1.39/drivers/ide/alim15x3.c 2017-04-18 17:56:30.581395889 +0200
  6760. @@ -234,7 +234,7 @@
  6761. isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
  6762. - local_irq_save(flags);
  6763. + local_irq_save_nort(flags);
  6764. if (m5229_revision < 0xC2) {
  6765. /*
  6766. @@ -325,7 +325,7 @@
  6767. }
  6768. pci_dev_put(north);
  6769. pci_dev_put(isa_dev);
  6770. - local_irq_restore(flags);
  6771. + local_irq_restore_nort(flags);
  6772. return 0;
  6773. }
  6774. diff -Nur linux-4.1.39.orig/drivers/ide/hpt366.c linux-4.1.39/drivers/ide/hpt366.c
  6775. --- linux-4.1.39.orig/drivers/ide/hpt366.c 2017-03-13 21:04:36.000000000 +0100
  6776. +++ linux-4.1.39/drivers/ide/hpt366.c 2017-04-18 17:56:30.581395889 +0200
  6777. @@ -1241,7 +1241,7 @@
  6778. dma_old = inb(base + 2);
  6779. - local_irq_save(flags);
  6780. + local_irq_save_nort(flags);
  6781. dma_new = dma_old;
  6782. pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
  6783. @@ -1252,7 +1252,7 @@
  6784. if (dma_new != dma_old)
  6785. outb(dma_new, base + 2);
  6786. - local_irq_restore(flags);
  6787. + local_irq_restore_nort(flags);
  6788. printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n",
  6789. hwif->name, base, base + 7);
  6790. diff -Nur linux-4.1.39.orig/drivers/ide/ide-io.c linux-4.1.39/drivers/ide/ide-io.c
  6791. --- linux-4.1.39.orig/drivers/ide/ide-io.c 2017-03-13 21:04:36.000000000 +0100
  6792. +++ linux-4.1.39/drivers/ide/ide-io.c 2017-04-18 17:56:30.581395889 +0200
  6793. @@ -659,7 +659,7 @@
  6794. /* disable_irq_nosync ?? */
  6795. disable_irq(hwif->irq);
  6796. /* local CPU only, as if we were handling an interrupt */
  6797. - local_irq_disable();
  6798. + local_irq_disable_nort();
  6799. if (hwif->polling) {
  6800. startstop = handler(drive);
  6801. } else if (drive_is_ready(drive)) {
  6802. diff -Nur linux-4.1.39.orig/drivers/ide/ide-iops.c linux-4.1.39/drivers/ide/ide-iops.c
  6803. --- linux-4.1.39.orig/drivers/ide/ide-iops.c 2017-03-13 21:04:36.000000000 +0100
  6804. +++ linux-4.1.39/drivers/ide/ide-iops.c 2017-04-18 17:56:30.585396045 +0200
  6805. @@ -129,12 +129,12 @@
  6806. if ((stat & ATA_BUSY) == 0)
  6807. break;
  6808. - local_irq_restore(flags);
  6809. + local_irq_restore_nort(flags);
  6810. *rstat = stat;
  6811. return -EBUSY;
  6812. }
  6813. }
  6814. - local_irq_restore(flags);
  6815. + local_irq_restore_nort(flags);
  6816. }
  6817. /*
  6818. * Allow status to settle, then read it again.
  6819. diff -Nur linux-4.1.39.orig/drivers/ide/ide-io-std.c linux-4.1.39/drivers/ide/ide-io-std.c
  6820. --- linux-4.1.39.orig/drivers/ide/ide-io-std.c 2017-03-13 21:04:36.000000000 +0100
  6821. +++ linux-4.1.39/drivers/ide/ide-io-std.c 2017-04-18 17:56:30.581395889 +0200
  6822. @@ -175,7 +175,7 @@
  6823. unsigned long uninitialized_var(flags);
  6824. if ((io_32bit & 2) && !mmio) {
  6825. - local_irq_save(flags);
  6826. + local_irq_save_nort(flags);
  6827. ata_vlb_sync(io_ports->nsect_addr);
  6828. }
  6829. @@ -186,7 +186,7 @@
  6830. insl(data_addr, buf, words);
  6831. if ((io_32bit & 2) && !mmio)
  6832. - local_irq_restore(flags);
  6833. + local_irq_restore_nort(flags);
  6834. if (((len + 1) & 3) < 2)
  6835. return;
  6836. @@ -219,7 +219,7 @@
  6837. unsigned long uninitialized_var(flags);
  6838. if ((io_32bit & 2) && !mmio) {
  6839. - local_irq_save(flags);
  6840. + local_irq_save_nort(flags);
  6841. ata_vlb_sync(io_ports->nsect_addr);
  6842. }
  6843. @@ -230,7 +230,7 @@
  6844. outsl(data_addr, buf, words);
  6845. if ((io_32bit & 2) && !mmio)
  6846. - local_irq_restore(flags);
  6847. + local_irq_restore_nort(flags);
  6848. if (((len + 1) & 3) < 2)
  6849. return;
  6850. diff -Nur linux-4.1.39.orig/drivers/ide/ide-probe.c linux-4.1.39/drivers/ide/ide-probe.c
  6851. --- linux-4.1.39.orig/drivers/ide/ide-probe.c 2017-03-13 21:04:36.000000000 +0100
  6852. +++ linux-4.1.39/drivers/ide/ide-probe.c 2017-04-18 17:56:30.585396045 +0200
  6853. @@ -196,10 +196,10 @@
  6854. int bswap = 1;
  6855. /* local CPU only; some systems need this */
  6856. - local_irq_save(flags);
  6857. + local_irq_save_nort(flags);
  6858. /* read 512 bytes of id info */
  6859. hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
  6860. - local_irq_restore(flags);
  6861. + local_irq_restore_nort(flags);
  6862. drive->dev_flags |= IDE_DFLAG_ID_READ;
  6863. #ifdef DEBUG
  6864. diff -Nur linux-4.1.39.orig/drivers/ide/ide-taskfile.c linux-4.1.39/drivers/ide/ide-taskfile.c
  6865. --- linux-4.1.39.orig/drivers/ide/ide-taskfile.c 2017-03-13 21:04:36.000000000 +0100
  6866. +++ linux-4.1.39/drivers/ide/ide-taskfile.c 2017-04-18 17:56:30.585396045 +0200
  6867. @@ -250,7 +250,7 @@
  6868. page_is_high = PageHighMem(page);
  6869. if (page_is_high)
  6870. - local_irq_save(flags);
  6871. + local_irq_save_nort(flags);
  6872. buf = kmap_atomic(page) + offset;
  6873. @@ -271,7 +271,7 @@
  6874. kunmap_atomic(buf);
  6875. if (page_is_high)
  6876. - local_irq_restore(flags);
  6877. + local_irq_restore_nort(flags);
  6878. len -= nr_bytes;
  6879. }
  6880. @@ -414,7 +414,7 @@
  6881. }
  6882. if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
  6883. - local_irq_disable();
  6884. + local_irq_disable_nort();
  6885. ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
  6886. diff -Nur linux-4.1.39.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c linux-4.1.39/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
  6887. --- linux-4.1.39.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2017-03-13 21:04:36.000000000 +0100
  6888. +++ linux-4.1.39/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2017-04-18 17:56:30.585396045 +0200
  6889. @@ -824,7 +824,7 @@
  6890. ipoib_dbg_mcast(priv, "restarting multicast task\n");
  6891. - local_irq_save(flags);
  6892. + local_irq_save_nort(flags);
  6893. netif_addr_lock(dev);
  6894. spin_lock(&priv->lock);
  6895. @@ -906,7 +906,7 @@
  6896. spin_unlock(&priv->lock);
  6897. netif_addr_unlock(dev);
  6898. - local_irq_restore(flags);
  6899. + local_irq_restore_nort(flags);
  6900. /*
  6901. * make sure the in-flight joins have finished before we attempt
  6902. diff -Nur linux-4.1.39.orig/drivers/input/gameport/gameport.c linux-4.1.39/drivers/input/gameport/gameport.c
  6903. --- linux-4.1.39.orig/drivers/input/gameport/gameport.c 2017-03-13 21:04:36.000000000 +0100
  6904. +++ linux-4.1.39/drivers/input/gameport/gameport.c 2017-04-18 17:56:30.585396045 +0200
  6905. @@ -124,12 +124,12 @@
  6906. tx = 1 << 30;
  6907. for(i = 0; i < 50; i++) {
  6908. - local_irq_save(flags);
  6909. + local_irq_save_nort(flags);
  6910. GET_TIME(t1);
  6911. for (t = 0; t < 50; t++) gameport_read(gameport);
  6912. GET_TIME(t2);
  6913. GET_TIME(t3);
  6914. - local_irq_restore(flags);
  6915. + local_irq_restore_nort(flags);
  6916. udelay(i * 10);
  6917. if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
  6918. }
  6919. @@ -148,11 +148,11 @@
  6920. tx = 1 << 30;
  6921. for(i = 0; i < 50; i++) {
  6922. - local_irq_save(flags);
  6923. + local_irq_save_nort(flags);
  6924. rdtscl(t1);
  6925. for (t = 0; t < 50; t++) gameport_read(gameport);
  6926. rdtscl(t2);
  6927. - local_irq_restore(flags);
  6928. + local_irq_restore_nort(flags);
  6929. udelay(i * 10);
  6930. if (t2 - t1 < tx) tx = t2 - t1;
  6931. }
  6932. diff -Nur linux-4.1.39.orig/drivers/leds/trigger/Kconfig linux-4.1.39/drivers/leds/trigger/Kconfig
  6933. --- linux-4.1.39.orig/drivers/leds/trigger/Kconfig 2017-03-13 21:04:36.000000000 +0100
  6934. +++ linux-4.1.39/drivers/leds/trigger/Kconfig 2017-04-18 17:56:30.585396045 +0200
  6935. @@ -61,7 +61,7 @@
  6936. config LEDS_TRIGGER_CPU
  6937. bool "LED CPU Trigger"
  6938. - depends on LEDS_TRIGGERS
  6939. + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE
  6940. help
  6941. This allows LEDs to be controlled by active CPUs. This shows
  6942. the active CPUs across an array of LEDs so you can see which
  6943. diff -Nur linux-4.1.39.orig/drivers/md/bcache/Kconfig linux-4.1.39/drivers/md/bcache/Kconfig
  6944. --- linux-4.1.39.orig/drivers/md/bcache/Kconfig 2017-03-13 21:04:36.000000000 +0100
  6945. +++ linux-4.1.39/drivers/md/bcache/Kconfig 2017-04-18 17:56:30.585396045 +0200
  6946. @@ -1,6 +1,7 @@
  6947. config BCACHE
  6948. tristate "Block device as cache"
  6949. + depends on !PREEMPT_RT_FULL
  6950. ---help---
  6951. Allows a block device to be used as cache for other devices; uses
  6952. a btree for indexing and the layout is optimized for SSDs.
  6953. diff -Nur linux-4.1.39.orig/drivers/md/dm.c linux-4.1.39/drivers/md/dm.c
  6954. --- linux-4.1.39.orig/drivers/md/dm.c 2017-03-13 21:04:36.000000000 +0100
  6955. +++ linux-4.1.39/drivers/md/dm.c 2017-04-18 17:56:30.585396045 +0200
  6956. @@ -2141,7 +2141,7 @@
  6957. /* Establish tio->ti before queuing work (map_tio_request) */
  6958. tio->ti = ti;
  6959. queue_kthread_work(&md->kworker, &tio->work);
  6960. - BUG_ON(!irqs_disabled());
  6961. + BUG_ON_NONRT(!irqs_disabled());
  6962. }
  6963. goto out;
  6964. diff -Nur linux-4.1.39.orig/drivers/md/raid5.c linux-4.1.39/drivers/md/raid5.c
  6965. --- linux-4.1.39.orig/drivers/md/raid5.c 2017-03-13 21:04:36.000000000 +0100
  6966. +++ linux-4.1.39/drivers/md/raid5.c 2017-04-18 17:56:30.585396045 +0200
  6967. @@ -1918,8 +1918,9 @@
  6968. struct raid5_percpu *percpu;
  6969. unsigned long cpu;
  6970. - cpu = get_cpu();
  6971. + cpu = get_cpu_light();
  6972. percpu = per_cpu_ptr(conf->percpu, cpu);
  6973. + spin_lock(&percpu->lock);
  6974. if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
  6975. ops_run_biofill(sh);
  6976. overlap_clear++;
  6977. @@ -1975,7 +1976,8 @@
  6978. if (test_and_clear_bit(R5_Overlap, &dev->flags))
  6979. wake_up(&sh->raid_conf->wait_for_overlap);
  6980. }
  6981. - put_cpu();
  6982. + spin_unlock(&percpu->lock);
  6983. + put_cpu_light();
  6984. }
  6985. static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
  6986. @@ -6375,6 +6377,7 @@
  6987. __func__, cpu);
  6988. break;
  6989. }
  6990. + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
  6991. }
  6992. put_online_cpus();
  6993. diff -Nur linux-4.1.39.orig/drivers/md/raid5.h linux-4.1.39/drivers/md/raid5.h
  6994. --- linux-4.1.39.orig/drivers/md/raid5.h 2017-03-13 21:04:36.000000000 +0100
  6995. +++ linux-4.1.39/drivers/md/raid5.h 2017-04-18 17:56:30.585396045 +0200
  6996. @@ -495,6 +495,7 @@
  6997. int recovery_disabled;
  6998. /* per cpu variables */
  6999. struct raid5_percpu {
  7000. + spinlock_t lock; /* Protection for -RT */
  7001. struct page *spare_page; /* Used when checking P/Q in raid6 */
  7002. struct flex_array *scribble; /* space for constructing buffer
  7003. * lists and performing address
  7004. diff -Nur linux-4.1.39.orig/drivers/misc/hwlat_detector.c linux-4.1.39/drivers/misc/hwlat_detector.c
  7005. --- linux-4.1.39.orig/drivers/misc/hwlat_detector.c 1970-01-01 01:00:00.000000000 +0100
  7006. +++ linux-4.1.39/drivers/misc/hwlat_detector.c 2017-04-18 17:56:30.585396045 +0200
  7007. @@ -0,0 +1,1240 @@
  7008. +/*
  7009. + * hwlat_detector.c - A simple Hardware Latency detector.
  7010. + *
  7011. + * Use this module to detect large system latencies induced by the behavior of
  7012. + * certain underlying system hardware or firmware, independent of Linux itself.
  7013. + * The code was developed originally to detect the presence of SMIs on Intel
  7014. + * and AMD systems, although there is no dependency upon x86 herein.
  7015. + *
  7016. + * The classical example usage of this module is in detecting the presence of
  7017. + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
  7018. + * somewhat special form of hardware interrupt spawned from earlier CPU debug
  7019. + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
  7020. + * LPC (or other device) to generate a special interrupt under certain
  7021. + * circumstances, for example, upon expiration of a special SMI timer device,
  7022. + * due to certain external thermal readings, on certain I/O address accesses,
  7023. + * and other situations. An SMI hits a special CPU pin, triggers a special
  7024. + * SMI mode (complete with special memory map), and the OS is unaware.
  7025. + *
  7026. + * Although certain hardware-inducing latencies are necessary (for example,
  7027. + * a modern system often requires an SMI handler for correct thermal control
  7028. + * and remote management) they can wreak havoc upon any OS-level performance
  7029. + * guarantees toward low-latency, especially when the OS is not even made
  7030. + * aware of the presence of these interrupts. For this reason, we need a
  7031. + * somewhat brute force mechanism to detect these interrupts. In this case,
  7032. + * we do it by hogging all of the CPU(s) for configurable timer intervals,
  7033. + * sampling the built-in CPU timer, looking for discontiguous readings.
  7034. + *
  7035. + * WARNING: This implementation necessarily introduces latencies. Therefore,
  7036. + * you should NEVER use this module in a production environment
  7037. + * requiring any kind of low-latency performance guarantee(s).
  7038. + *
  7039. + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
  7040. + *
  7041. + * Includes useful feedback from Clark Williams <clark@redhat.com>
  7042. + *
  7043. + * This file is licensed under the terms of the GNU General Public
  7044. + * License version 2. This program is licensed "as is" without any
  7045. + * warranty of any kind, whether express or implied.
  7046. + */
  7047. +
  7048. +#include <linux/module.h>
  7049. +#include <linux/init.h>
  7050. +#include <linux/ring_buffer.h>
  7051. +#include <linux/time.h>
  7052. +#include <linux/hrtimer.h>
  7053. +#include <linux/kthread.h>
  7054. +#include <linux/debugfs.h>
  7055. +#include <linux/seq_file.h>
  7056. +#include <linux/uaccess.h>
  7057. +#include <linux/version.h>
  7058. +#include <linux/delay.h>
  7059. +#include <linux/slab.h>
  7060. +#include <linux/trace_clock.h>
  7061. +
  7062. +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
  7063. +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
  7064. +#define U64STR_SIZE 22 /* 20 digits max */
  7065. +
  7066. +#define VERSION "1.0.0"
  7067. +#define BANNER "hwlat_detector: "
  7068. +#define DRVNAME "hwlat_detector"
  7069. +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
  7070. +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
  7071. +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */
  7072. +
  7073. +/* Module metadata */
  7074. +
  7075. +MODULE_LICENSE("GPL");
  7076. +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
  7077. +MODULE_DESCRIPTION("A simple hardware latency detector");
  7078. +MODULE_VERSION(VERSION);
  7079. +
  7080. +/* Module parameters */
  7081. +
  7082. +static int debug;
  7083. +static int enabled;
  7084. +static int threshold;
  7085. +
  7086. +module_param(debug, int, 0); /* enable debug */
  7087. +module_param(enabled, int, 0); /* enable detector */
  7088. +module_param(threshold, int, 0); /* latency threshold */
  7089. +
  7090. +/* Buffering and sampling */
  7091. +
  7092. +static struct ring_buffer *ring_buffer; /* sample buffer */
  7093. +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */
  7094. +static unsigned long buf_size = BUF_SIZE_DEFAULT;
  7095. +static struct task_struct *kthread; /* sampling thread */
  7096. +
  7097. +/* DebugFS filesystem entries */
  7098. +
  7099. +static struct dentry *debug_dir; /* debugfs directory */
  7100. +static struct dentry *debug_max; /* maximum TSC delta */
  7101. +static struct dentry *debug_count; /* total detect count */
  7102. +static struct dentry *debug_sample_width; /* sample width us */
  7103. +static struct dentry *debug_sample_window; /* sample window us */
  7104. +static struct dentry *debug_sample; /* raw samples us */
  7105. +static struct dentry *debug_threshold; /* threshold us */
  7106. +static struct dentry *debug_enable; /* enable/disable */
  7107. +
  7108. +/* Individual samples and global state */
  7109. +
  7110. +struct sample; /* latency sample */
  7111. +struct data; /* Global state */
  7112. +
  7113. +/* Sampling functions */
  7114. +static int __buffer_add_sample(struct sample *sample);
  7115. +static struct sample *buffer_get_sample(struct sample *sample);
  7116. +
  7117. +/* Threading and state */
  7118. +static int kthread_fn(void *unused);
  7119. +static int start_kthread(void);
  7120. +static int stop_kthread(void);
  7121. +static void __reset_stats(void);
  7122. +static int init_stats(void);
  7123. +
  7124. +/* Debugfs interface */
  7125. +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  7126. + size_t cnt, loff_t *ppos, const u64 *entry);
  7127. +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
  7128. + size_t cnt, loff_t *ppos, u64 *entry);
  7129. +static int debug_sample_fopen(struct inode *inode, struct file *filp);
  7130. +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
  7131. + size_t cnt, loff_t *ppos);
  7132. +static int debug_sample_release(struct inode *inode, struct file *filp);
  7133. +static int debug_enable_fopen(struct inode *inode, struct file *filp);
  7134. +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
  7135. + size_t cnt, loff_t *ppos);
  7136. +static ssize_t debug_enable_fwrite(struct file *file,
  7137. + const char __user *user_buffer,
  7138. + size_t user_size, loff_t *offset);
  7139. +
  7140. +/* Initialization functions */
  7141. +static int init_debugfs(void);
  7142. +static void free_debugfs(void);
  7143. +static int detector_init(void);
  7144. +static void detector_exit(void);
  7145. +
  7146. +/* Individual latency samples are stored here when detected and packed into
  7147. + * the ring_buffer circular buffer, where they are overwritten when
  7148. + * more than buf_size/sizeof(sample) samples are received. */
  7149. +struct sample {
  7150. + u64 seqnum; /* unique sequence */
  7151. + u64 duration; /* ktime delta */
  7152. + u64 outer_duration; /* ktime delta (outer loop) */
  7153. + struct timespec timestamp; /* wall time */
  7154. + unsigned long lost;
  7155. +};
  7156. +
  7157. +/* keep the global state somewhere. */
  7158. +static struct data {
  7159. +
  7160. + struct mutex lock; /* protect changes */
  7161. +
  7162. + u64 count; /* total since reset */
  7163. + u64 max_sample; /* max hardware latency */
  7164. + u64 threshold; /* sample threshold level */
  7165. +
  7166. + u64 sample_window; /* total sampling window (on+off) */
  7167. + u64 sample_width; /* active sampling portion of window */
  7168. +
  7169. + atomic_t sample_open; /* whether the sample file is open */
  7170. +
  7171. + wait_queue_head_t wq; /* waitqeue for new sample values */
  7172. +
  7173. +} data;
  7174. +
  7175. +/**
  7176. + * __buffer_add_sample - add a new latency sample recording to the ring buffer
  7177. + * @sample: The new latency sample value
  7178. + *
  7179. + * This receives a new latency sample and records it in a global ring buffer.
  7180. + * No additional locking is used in this case.
  7181. + */
  7182. +static int __buffer_add_sample(struct sample *sample)
  7183. +{
  7184. + return ring_buffer_write(ring_buffer,
  7185. + sizeof(struct sample), sample);
  7186. +}
  7187. +
  7188. +/**
  7189. + * buffer_get_sample - remove a hardware latency sample from the ring buffer
  7190. + * @sample: Pre-allocated storage for the sample
  7191. + *
  7192. + * This retrieves a hardware latency sample from the global circular buffer
  7193. + */
  7194. +static struct sample *buffer_get_sample(struct sample *sample)
  7195. +{
  7196. + struct ring_buffer_event *e = NULL;
  7197. + struct sample *s = NULL;
  7198. + unsigned int cpu = 0;
  7199. +
  7200. + if (!sample)
  7201. + return NULL;
  7202. +
  7203. + mutex_lock(&ring_buffer_mutex);
  7204. + for_each_online_cpu(cpu) {
  7205. + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost);
  7206. + if (e)
  7207. + break;
  7208. + }
  7209. +
  7210. + if (e) {
  7211. + s = ring_buffer_event_data(e);
  7212. + memcpy(sample, s, sizeof(struct sample));
  7213. + } else
  7214. + sample = NULL;
  7215. + mutex_unlock(&ring_buffer_mutex);
  7216. +
  7217. + return sample;
  7218. +}
  7219. +
  7220. +#ifndef CONFIG_TRACING
  7221. +#define time_type ktime_t
  7222. +#define time_get() ktime_get()
  7223. +#define time_to_us(x) ktime_to_us(x)
  7224. +#define time_sub(a, b) ktime_sub(a, b)
  7225. +#define init_time(a, b) (a).tv64 = b
  7226. +#define time_u64(a) ((a).tv64)
  7227. +#else
  7228. +#define time_type u64
  7229. +#define time_get() trace_clock_local()
  7230. +#define time_to_us(x) div_u64(x, 1000)
  7231. +#define time_sub(a, b) ((a) - (b))
  7232. +#define init_time(a, b) (a = b)
  7233. +#define time_u64(a) a
  7234. +#endif
  7235. +/**
  7236. + * get_sample - sample the CPU TSC and look for likely hardware latencies
  7237. + *
  7238. + * Used to repeatedly capture the CPU TSC (or similar), looking for potential
  7239. + * hardware-induced latency. Called with interrupts disabled and with
  7240. + * data.lock held.
  7241. + */
  7242. +static int get_sample(void)
  7243. +{
  7244. + time_type start, t1, t2, last_t2;
  7245. + s64 diff, total = 0;
  7246. + u64 sample = 0;
  7247. + u64 outer_sample = 0;
  7248. + int ret = -1;
  7249. +
  7250. + init_time(last_t2, 0);
  7251. + start = time_get(); /* start timestamp */
  7252. +
  7253. + do {
  7254. +
  7255. + t1 = time_get(); /* we'll look for a discontinuity */
  7256. + t2 = time_get();
  7257. +
  7258. + if (time_u64(last_t2)) {
  7259. + /* Check the delta from outer loop (t2 to next t1) */
  7260. + diff = time_to_us(time_sub(t1, last_t2));
  7261. + /* This shouldn't happen */
  7262. + if (diff < 0) {
  7263. + pr_err(BANNER "time running backwards\n");
  7264. + goto out;
  7265. + }
  7266. + if (diff > outer_sample)
  7267. + outer_sample = diff;
  7268. + }
  7269. + last_t2 = t2;
  7270. +
  7271. + total = time_to_us(time_sub(t2, start)); /* sample width */
  7272. +
  7273. + /* This checks the inner loop (t1 to t2) */
  7274. + diff = time_to_us(time_sub(t2, t1)); /* current diff */
  7275. +
  7276. + /* This shouldn't happen */
  7277. + if (diff < 0) {
  7278. + pr_err(BANNER "time running backwards\n");
  7279. + goto out;
  7280. + }
  7281. +
  7282. + if (diff > sample)
  7283. + sample = diff; /* only want highest value */
  7284. +
  7285. + } while (total <= data.sample_width);
  7286. +
  7287. + ret = 0;
  7288. +
  7289. + /* If we exceed the threshold value, we have found a hardware latency */
  7290. + if (sample > data.threshold || outer_sample > data.threshold) {
  7291. + struct sample s;
  7292. +
  7293. + ret = 1;
  7294. +
  7295. + data.count++;
  7296. + s.seqnum = data.count;
  7297. + s.duration = sample;
  7298. + s.outer_duration = outer_sample;
  7299. + s.timestamp = CURRENT_TIME;
  7300. + __buffer_add_sample(&s);
  7301. +
  7302. + /* Keep a running maximum ever recorded hardware latency */
  7303. + if (sample > data.max_sample)
  7304. + data.max_sample = sample;
  7305. + }
  7306. +
  7307. +out:
  7308. + return ret;
  7309. +}
  7310. +
  7311. +/*
  7312. + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
  7313. + * @unused: A required part of the kthread API.
  7314. + *
  7315. + * Used to periodically sample the CPU TSC via a call to get_sample. We
  7316. + * disable interrupts, which does (intentionally) introduce latency since we
  7317. + * need to ensure nothing else might be running (and thus pre-empting).
  7318. + * Obviously this should never be used in production environments.
  7319. + *
  7320. + * Currently this runs on which ever CPU it was scheduled on, but most
  7321. + * real-worald hardware latency situations occur across several CPUs,
  7322. + * but we might later generalize this if we find there are any actualy
  7323. + * systems with alternate SMI delivery or other hardware latencies.
  7324. + */
  7325. +static int kthread_fn(void *unused)
  7326. +{
  7327. + int ret;
  7328. + u64 interval;
  7329. +
  7330. + while (!kthread_should_stop()) {
  7331. +
  7332. + mutex_lock(&data.lock);
  7333. +
  7334. + local_irq_disable();
  7335. + ret = get_sample();
  7336. + local_irq_enable();
  7337. +
  7338. + if (ret > 0)
  7339. + wake_up(&data.wq); /* wake up reader(s) */
  7340. +
  7341. + interval = data.sample_window - data.sample_width;
  7342. + do_div(interval, USEC_PER_MSEC); /* modifies interval value */
  7343. +
  7344. + mutex_unlock(&data.lock);
  7345. +
  7346. + if (msleep_interruptible(interval))
  7347. + break;
  7348. + }
  7349. +
  7350. + return 0;
  7351. +}
  7352. +
  7353. +/**
  7354. + * start_kthread - Kick off the hardware latency sampling/detector kthread
  7355. + *
  7356. + * This starts a kernel thread that will sit and sample the CPU timestamp
  7357. + * counter (TSC or similar) and look for potential hardware latencies.
  7358. + */
  7359. +static int start_kthread(void)
  7360. +{
  7361. + kthread = kthread_run(kthread_fn, NULL,
  7362. + DRVNAME);
  7363. + if (IS_ERR(kthread)) {
  7364. + pr_err(BANNER "could not start sampling thread\n");
  7365. + enabled = 0;
  7366. + return -ENOMEM;
  7367. + }
  7368. +
  7369. + return 0;
  7370. +}
  7371. +
  7372. +/**
  7373. + * stop_kthread - Inform the hardware latency samping/detector kthread to stop
  7374. + *
  7375. + * This kicks the running hardware latency sampling/detector kernel thread and
  7376. + * tells it to stop sampling now. Use this on unload and at system shutdown.
  7377. + */
  7378. +static int stop_kthread(void)
  7379. +{
  7380. + int ret;
  7381. +
  7382. + ret = kthread_stop(kthread);
  7383. +
  7384. + return ret;
  7385. +}
  7386. +
  7387. +/**
  7388. + * __reset_stats - Reset statistics for the hardware latency detector
  7389. + *
  7390. + * We use data to store various statistics and global state. We call this
  7391. + * function in order to reset those when "enable" is toggled on or off, and
  7392. + * also at initialization. Should be called with data.lock held.
  7393. + */
  7394. +static void __reset_stats(void)
  7395. +{
  7396. + data.count = 0;
  7397. + data.max_sample = 0;
  7398. + ring_buffer_reset(ring_buffer); /* flush out old sample entries */
  7399. +}
  7400. +
  7401. +/**
  7402. + * init_stats - Setup global state statistics for the hardware latency detector
  7403. + *
  7404. + * We use data to store various statistics and global state. We also use
  7405. + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
  7406. + * induced system latencies. This function initializes these structures and
  7407. + * allocates the global ring buffer also.
  7408. + */
  7409. +static int init_stats(void)
  7410. +{
  7411. + int ret = -ENOMEM;
  7412. +
  7413. + mutex_init(&data.lock);
  7414. + init_waitqueue_head(&data.wq);
  7415. + atomic_set(&data.sample_open, 0);
  7416. +
  7417. + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
  7418. +
  7419. + if (WARN(!ring_buffer, KERN_ERR BANNER
  7420. + "failed to allocate ring buffer!\n"))
  7421. + goto out;
  7422. +
  7423. + __reset_stats();
  7424. + data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */
  7425. + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
  7426. + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
  7427. +
  7428. + ret = 0;
  7429. +
  7430. +out:
  7431. + return ret;
  7432. +
  7433. +}
  7434. +
  7435. +/*
  7436. + * simple_data_read - Wrapper read function for global state debugfs entries
  7437. + * @filp: The active open file structure for the debugfs "file"
  7438. + * @ubuf: The userspace provided buffer to read value into
  7439. + * @cnt: The maximum number of bytes to read
  7440. + * @ppos: The current "file" position
  7441. + * @entry: The entry to read from
  7442. + *
  7443. + * This function provides a generic read implementation for the global state
  7444. + * "data" structure debugfs filesystem entries. It would be nice to use
  7445. + * simple_attr_read directly, but we need to make sure that the data.lock
  7446. + * is held during the actual read.
  7447. + */
  7448. +static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  7449. + size_t cnt, loff_t *ppos, const u64 *entry)
  7450. +{
  7451. + char buf[U64STR_SIZE];
  7452. + u64 val = 0;
  7453. + int len = 0;
  7454. +
  7455. + memset(buf, 0, sizeof(buf));
  7456. +
  7457. + if (!entry)
  7458. + return -EFAULT;
  7459. +
  7460. + mutex_lock(&data.lock);
  7461. + val = *entry;
  7462. + mutex_unlock(&data.lock);
  7463. +
  7464. + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
  7465. +
  7466. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
  7467. +
  7468. +}
  7469. +
  7470. +/*
  7471. + * simple_data_write - Wrapper write function for global state debugfs entries
  7472. + * @filp: The active open file structure for the debugfs "file"
  7473. + * @ubuf: The userspace provided buffer to write value from
  7474. + * @cnt: The maximum number of bytes to write
  7475. + * @ppos: The current "file" position
  7476. + * @entry: The entry to write to
  7477. + *
  7478. + * This function provides a generic write implementation for the global state
  7479. + * "data" structure debugfs filesystem entries. It would be nice to use
  7480. + * simple_attr_write directly, but we need to make sure that the data.lock
  7481. + * is held during the actual write.
  7482. + */
  7483. +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
  7484. + size_t cnt, loff_t *ppos, u64 *entry)
  7485. +{
  7486. + char buf[U64STR_SIZE];
  7487. + int csize = min(cnt, sizeof(buf));
  7488. + u64 val = 0;
  7489. + int err = 0;
  7490. +
  7491. + memset(buf, '\0', sizeof(buf));
  7492. + if (copy_from_user(buf, ubuf, csize))
  7493. + return -EFAULT;
  7494. +
  7495. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  7496. + err = kstrtoull(buf, 10, &val);
  7497. + if (err)
  7498. + return -EINVAL;
  7499. +
  7500. + mutex_lock(&data.lock);
  7501. + *entry = val;
  7502. + mutex_unlock(&data.lock);
  7503. +
  7504. + return csize;
  7505. +}
  7506. +
  7507. +/**
  7508. + * debug_count_fopen - Open function for "count" debugfs entry
  7509. + * @inode: The in-kernel inode representation of the debugfs "file"
  7510. + * @filp: The active open file structure for the debugfs "file"
  7511. + *
  7512. + * This function provides an open implementation for the "count" debugfs
  7513. + * interface to the hardware latency detector.
  7514. + */
  7515. +static int debug_count_fopen(struct inode *inode, struct file *filp)
  7516. +{
  7517. + return 0;
  7518. +}
  7519. +
  7520. +/**
  7521. + * debug_count_fread - Read function for "count" debugfs entry
  7522. + * @filp: The active open file structure for the debugfs "file"
  7523. + * @ubuf: The userspace provided buffer to read value into
  7524. + * @cnt: The maximum number of bytes to read
  7525. + * @ppos: The current "file" position
  7526. + *
  7527. + * This function provides a read implementation for the "count" debugfs
  7528. + * interface to the hardware latency detector. Can be used to read the
  7529. + * number of latency readings exceeding the configured threshold since
  7530. + * the detector was last reset (e.g. by writing a zero into "count").
  7531. + */
  7532. +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
  7533. + size_t cnt, loff_t *ppos)
  7534. +{
  7535. + return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
  7536. +}
  7537. +
  7538. +/**
  7539. + * debug_count_fwrite - Write function for "count" debugfs entry
  7540. + * @filp: The active open file structure for the debugfs "file"
  7541. + * @ubuf: The user buffer that contains the value to write
  7542. + * @cnt: The maximum number of bytes to write to "file"
  7543. + * @ppos: The current position in the debugfs "file"
  7544. + *
  7545. + * This function provides a write implementation for the "count" debugfs
  7546. + * interface to the hardware latency detector. Can be used to write a
  7547. + * desired value, especially to zero the total count.
  7548. + */
  7549. +static ssize_t debug_count_fwrite(struct file *filp,
  7550. + const char __user *ubuf,
  7551. + size_t cnt,
  7552. + loff_t *ppos)
  7553. +{
  7554. + return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
  7555. +}
  7556. +
  7557. +/**
  7558. + * debug_enable_fopen - Dummy open function for "enable" debugfs interface
  7559. + * @inode: The in-kernel inode representation of the debugfs "file"
  7560. + * @filp: The active open file structure for the debugfs "file"
  7561. + *
  7562. + * This function provides an open implementation for the "enable" debugfs
  7563. + * interface to the hardware latency detector.
  7564. + */
  7565. +static int debug_enable_fopen(struct inode *inode, struct file *filp)
  7566. +{
  7567. + return 0;
  7568. +}
  7569. +
  7570. +/**
  7571. + * debug_enable_fread - Read function for "enable" debugfs interface
  7572. + * @filp: The active open file structure for the debugfs "file"
  7573. + * @ubuf: The userspace provided buffer to read value into
  7574. + * @cnt: The maximum number of bytes to read
  7575. + * @ppos: The current "file" position
  7576. + *
  7577. + * This function provides a read implementation for the "enable" debugfs
  7578. + * interface to the hardware latency detector. Can be used to determine
  7579. + * whether the detector is currently enabled ("0\n" or "1\n" returned).
  7580. + */
  7581. +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
  7582. + size_t cnt, loff_t *ppos)
  7583. +{
  7584. + char buf[4];
  7585. +
  7586. + if ((cnt < sizeof(buf)) || (*ppos))
  7587. + return 0;
  7588. +
  7589. + buf[0] = enabled ? '1' : '0';
  7590. + buf[1] = '\n';
  7591. + buf[2] = '\0';
  7592. + if (copy_to_user(ubuf, buf, strlen(buf)))
  7593. + return -EFAULT;
  7594. + return *ppos = strlen(buf);
  7595. +}
  7596. +
  7597. +/**
  7598. + * debug_enable_fwrite - Write function for "enable" debugfs interface
  7599. + * @filp: The active open file structure for the debugfs "file"
  7600. + * @ubuf: The user buffer that contains the value to write
  7601. + * @cnt: The maximum number of bytes to write to "file"
  7602. + * @ppos: The current position in the debugfs "file"
  7603. + *
  7604. + * This function provides a write implementation for the "enable" debugfs
  7605. + * interface to the hardware latency detector. Can be used to enable or
  7606. + * disable the detector, which will have the side-effect of possibly
  7607. + * also resetting the global stats and kicking off the measuring
  7608. + * kthread (on an enable) or the converse (upon a disable).
  7609. + */
  7610. +static ssize_t debug_enable_fwrite(struct file *filp,
  7611. + const char __user *ubuf,
  7612. + size_t cnt,
  7613. + loff_t *ppos)
  7614. +{
  7615. + char buf[4];
  7616. + int csize = min(cnt, sizeof(buf));
  7617. + long val = 0;
  7618. + int err = 0;
  7619. +
  7620. + memset(buf, '\0', sizeof(buf));
  7621. + if (copy_from_user(buf, ubuf, csize))
  7622. + return -EFAULT;
  7623. +
  7624. + buf[sizeof(buf)-1] = '\0'; /* just in case */
  7625. + err = kstrtoul(buf, 10, &val);
  7626. + if (0 != err)
  7627. + return -EINVAL;
  7628. +
  7629. + if (val) {
  7630. + if (enabled)
  7631. + goto unlock;
  7632. + enabled = 1;
  7633. + __reset_stats();
  7634. + if (start_kthread())
  7635. + return -EFAULT;
  7636. + } else {
  7637. + if (!enabled)
  7638. + goto unlock;
  7639. + enabled = 0;
  7640. + err = stop_kthread();
  7641. + if (err) {
  7642. + pr_err(BANNER "cannot stop kthread\n");
  7643. + return -EFAULT;
  7644. + }
  7645. + wake_up(&data.wq); /* reader(s) should return */
  7646. + }
  7647. +unlock:
  7648. + return csize;
  7649. +}
  7650. +
  7651. +/**
  7652. + * debug_max_fopen - Open function for "max" debugfs entry
  7653. + * @inode: The in-kernel inode representation of the debugfs "file"
  7654. + * @filp: The active open file structure for the debugfs "file"
  7655. + *
  7656. + * This function provides an open implementation for the "max" debugfs
  7657. + * interface to the hardware latency detector.
  7658. + */
  7659. +static int debug_max_fopen(struct inode *inode, struct file *filp)
  7660. +{
  7661. + return 0;
  7662. +}
  7663. +
  7664. +/**
  7665. + * debug_max_fread - Read function for "max" debugfs entry
  7666. + * @filp: The active open file structure for the debugfs "file"
  7667. + * @ubuf: The userspace provided buffer to read value into
  7668. + * @cnt: The maximum number of bytes to read
  7669. + * @ppos: The current "file" position
  7670. + *
  7671. + * This function provides a read implementation for the "max" debugfs
  7672. + * interface to the hardware latency detector. Can be used to determine
  7673. + * the maximum latency value observed since it was last reset.
  7674. + */
  7675. +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
  7676. + size_t cnt, loff_t *ppos)
  7677. +{
  7678. + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
  7679. +}
  7680. +
  7681. +/**
  7682. + * debug_max_fwrite - Write function for "max" debugfs entry
  7683. + * @filp: The active open file structure for the debugfs "file"
  7684. + * @ubuf: The user buffer that contains the value to write
  7685. + * @cnt: The maximum number of bytes to write to "file"
  7686. + * @ppos: The current position in the debugfs "file"
  7687. + *
  7688. + * This function provides a write implementation for the "max" debugfs
  7689. + * interface to the hardware latency detector. Can be used to reset the
  7690. + * maximum or set it to some other desired value - if, then, subsequent
  7691. + * measurements exceed this value, the maximum will be updated.
  7692. + */
  7693. +static ssize_t debug_max_fwrite(struct file *filp,
  7694. + const char __user *ubuf,
  7695. + size_t cnt,
  7696. + loff_t *ppos)
  7697. +{
  7698. + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
  7699. +}
  7700. +
  7701. +
  7702. +/**
  7703. + * debug_sample_fopen - An open function for "sample" debugfs interface
  7704. + * @inode: The in-kernel inode representation of this debugfs "file"
  7705. + * @filp: The active open file structure for the debugfs "file"
  7706. + *
  7707. + * This function handles opening the "sample" file within the hardware
  7708. + * latency detector debugfs directory interface. This file is used to read
  7709. + * raw samples from the global ring_buffer and allows the user to see a
  7710. + * running latency history. Can be opened blocking or non-blocking,
  7711. + * affecting whether it behaves as a buffer read pipe, or does not.
  7712. + * Implements simple locking to prevent multiple simultaneous use.
  7713. + */
  7714. +static int debug_sample_fopen(struct inode *inode, struct file *filp)
  7715. +{
  7716. + if (!atomic_add_unless(&data.sample_open, 1, 1))
  7717. + return -EBUSY;
  7718. + else
  7719. + return 0;
  7720. +}
  7721. +
  7722. +/**
  7723. + * debug_sample_fread - A read function for "sample" debugfs interface
  7724. + * @filp: The active open file structure for the debugfs "file"
  7725. + * @ubuf: The user buffer that will contain the samples read
  7726. + * @cnt: The maximum bytes to read from the debugfs "file"
  7727. + * @ppos: The current position in the debugfs "file"
  7728. + *
  7729. + * This function handles reading from the "sample" file within the hardware
  7730. + * latency detector debugfs directory interface. This file is used to read
  7731. + * raw samples from the global ring_buffer and allows the user to see a
  7732. + * running latency history. By default this will block pending a new
  7733. + * value written into the sample buffer, unless there are already a
  7734. + * number of value(s) waiting in the buffer, or the sample file was
  7735. + * previously opened in a non-blocking mode of operation.
  7736. + */
  7737. +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
  7738. + size_t cnt, loff_t *ppos)
  7739. +{
  7740. + int len = 0;
  7741. + char buf[64];
  7742. + struct sample *sample = NULL;
  7743. +
  7744. + if (!enabled)
  7745. + return 0;
  7746. +
  7747. + sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
  7748. + if (!sample)
  7749. + return -ENOMEM;
  7750. +
  7751. + while (!buffer_get_sample(sample)) {
  7752. +
  7753. + DEFINE_WAIT(wait);
  7754. +
  7755. + if (filp->f_flags & O_NONBLOCK) {
  7756. + len = -EAGAIN;
  7757. + goto out;
  7758. + }
  7759. +
  7760. + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
  7761. + schedule();
  7762. + finish_wait(&data.wq, &wait);
  7763. +
  7764. + if (signal_pending(current)) {
  7765. + len = -EINTR;
  7766. + goto out;
  7767. + }
  7768. +
  7769. + if (!enabled) { /* enable was toggled */
  7770. + len = 0;
  7771. + goto out;
  7772. + }
  7773. + }
  7774. +
  7775. + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
  7776. + sample->timestamp.tv_sec,
  7777. + sample->timestamp.tv_nsec,
  7778. + sample->duration,
  7779. + sample->outer_duration);
  7780. +
  7781. +
  7782. + /* handling partial reads is more trouble than it's worth */
  7783. + if (len > cnt)
  7784. + goto out;
  7785. +
  7786. + if (copy_to_user(ubuf, buf, len))
  7787. + len = -EFAULT;
  7788. +
  7789. +out:
  7790. + kfree(sample);
  7791. + return len;
  7792. +}
  7793. +
  7794. +/**
  7795. + * debug_sample_release - Release function for "sample" debugfs interface
  7796. + * @inode: The in-kernel inode represenation of the debugfs "file"
  7797. + * @filp: The active open file structure for the debugfs "file"
  7798. + *
  7799. + * This function completes the close of the debugfs interface "sample" file.
  7800. + * Frees the sample_open "lock" so that other users may open the interface.
  7801. + */
  7802. +static int debug_sample_release(struct inode *inode, struct file *filp)
  7803. +{
  7804. + atomic_dec(&data.sample_open);
  7805. +
  7806. + return 0;
  7807. +}
  7808. +
  7809. +/**
  7810. + * debug_threshold_fopen - Open function for "threshold" debugfs entry
  7811. + * @inode: The in-kernel inode representation of the debugfs "file"
  7812. + * @filp: The active open file structure for the debugfs "file"
  7813. + *
  7814. + * This function provides an open implementation for the "threshold" debugfs
  7815. + * interface to the hardware latency detector.
  7816. + */
  7817. +static int debug_threshold_fopen(struct inode *inode, struct file *filp)
  7818. +{
  7819. + return 0;
  7820. +}
  7821. +
  7822. +/**
  7823. + * debug_threshold_fread - Read function for "threshold" debugfs entry
  7824. + * @filp: The active open file structure for the debugfs "file"
  7825. + * @ubuf: The userspace provided buffer to read value into
  7826. + * @cnt: The maximum number of bytes to read
  7827. + * @ppos: The current "file" position
  7828. + *
  7829. + * This function provides a read implementation for the "threshold" debugfs
  7830. + * interface to the hardware latency detector. It can be used to determine
  7831. + * the current threshold level at which a latency will be recorded in the
  7832. + * global ring buffer, typically on the order of 10us.
  7833. + */
  7834. +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
  7835. + size_t cnt, loff_t *ppos)
  7836. +{
  7837. + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
  7838. +}
  7839. +
  7840. +/**
  7841. + * debug_threshold_fwrite - Write function for "threshold" debugfs entry
  7842. + * @filp: The active open file structure for the debugfs "file"
  7843. + * @ubuf: The user buffer that contains the value to write
  7844. + * @cnt: The maximum number of bytes to write to "file"
  7845. + * @ppos: The current position in the debugfs "file"
  7846. + *
  7847. + * This function provides a write implementation for the "threshold" debugfs
  7848. + * interface to the hardware latency detector. It can be used to configure
  7849. + * the threshold level at which any subsequently detected latencies will
  7850. + * be recorded into the global ring buffer.
  7851. + */
  7852. +static ssize_t debug_threshold_fwrite(struct file *filp,
  7853. + const char __user *ubuf,
  7854. + size_t cnt,
  7855. + loff_t *ppos)
  7856. +{
  7857. + int ret;
  7858. +
  7859. + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
  7860. +
  7861. + if (enabled)
  7862. + wake_up_process(kthread);
  7863. +
  7864. + return ret;
  7865. +}
  7866. +
  7867. +/**
  7868. + * debug_width_fopen - Open function for "width" debugfs entry
  7869. + * @inode: The in-kernel inode representation of the debugfs "file"
  7870. + * @filp: The active open file structure for the debugfs "file"
  7871. + *
  7872. + * This function provides an open implementation for the "width" debugfs
  7873. + * interface to the hardware latency detector.
  7874. + */
  7875. +static int debug_width_fopen(struct inode *inode, struct file *filp)
  7876. +{
  7877. + return 0;
  7878. +}
  7879. +
  7880. +/**
  7881. + * debug_width_fread - Read function for "width" debugfs entry
  7882. + * @filp: The active open file structure for the debugfs "file"
  7883. + * @ubuf: The userspace provided buffer to read value into
  7884. + * @cnt: The maximum number of bytes to read
  7885. + * @ppos: The current "file" position
  7886. + *
  7887. + * This function provides a read implementation for the "width" debugfs
  7888. + * interface to the hardware latency detector. It can be used to determine
  7889. + * for how many us of the total window us we will actively sample for any
  7890. + * hardware-induced latecy periods. Obviously, it is not possible to
  7891. + * sample constantly and have the system respond to a sample reader, or,
  7892. + * worse, without having the system appear to have gone out to lunch.
  7893. + */
  7894. +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
  7895. + size_t cnt, loff_t *ppos)
  7896. +{
  7897. + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
  7898. +}
  7899. +
  7900. +/**
  7901. + * debug_width_fwrite - Write function for "width" debugfs entry
  7902. + * @filp: The active open file structure for the debugfs "file"
  7903. + * @ubuf: The user buffer that contains the value to write
  7904. + * @cnt: The maximum number of bytes to write to "file"
  7905. + * @ppos: The current position in the debugfs "file"
  7906. + *
  7907. + * This function provides a write implementation for the "width" debugfs
  7908. + * interface to the hardware latency detector. It can be used to configure
  7909. + * for how many us of the total window us we will actively sample for any
  7910. + * hardware-induced latency periods. Obviously, it is not possible to
  7911. + * sample constantly and have the system respond to a sample reader, or,
  7912. + * worse, without having the system appear to have gone out to lunch. It
  7913. + * is enforced that width is less that the total window size.
  7914. + */
  7915. +static ssize_t debug_width_fwrite(struct file *filp,
  7916. + const char __user *ubuf,
  7917. + size_t cnt,
  7918. + loff_t *ppos)
  7919. +{
  7920. + char buf[U64STR_SIZE];
  7921. + int csize = min(cnt, sizeof(buf));
  7922. + u64 val = 0;
  7923. + int err = 0;
  7924. +
  7925. + memset(buf, '\0', sizeof(buf));
  7926. + if (copy_from_user(buf, ubuf, csize))
  7927. + return -EFAULT;
  7928. +
  7929. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  7930. + err = kstrtoull(buf, 10, &val);
  7931. + if (0 != err)
  7932. + return -EINVAL;
  7933. +
  7934. + mutex_lock(&data.lock);
  7935. + if (val < data.sample_window)
  7936. + data.sample_width = val;
  7937. + else {
  7938. + mutex_unlock(&data.lock);
  7939. + return -EINVAL;
  7940. + }
  7941. + mutex_unlock(&data.lock);
  7942. +
  7943. + if (enabled)
  7944. + wake_up_process(kthread);
  7945. +
  7946. + return csize;
  7947. +}
  7948. +
  7949. +/**
  7950. + * debug_window_fopen - Open function for "window" debugfs entry
  7951. + * @inode: The in-kernel inode representation of the debugfs "file"
  7952. + * @filp: The active open file structure for the debugfs "file"
  7953. + *
  7954. + * This function provides an open implementation for the "window" debugfs
  7955. + * interface to the hardware latency detector. The window is the total time
  7956. + * in us that will be considered one sample period. Conceptually, windows
  7957. + * occur back-to-back and contain a sample width period during which
  7958. + * actual sampling occurs.
  7959. + */
  7960. +static int debug_window_fopen(struct inode *inode, struct file *filp)
  7961. +{
  7962. + return 0;
  7963. +}
  7964. +
  7965. +/**
  7966. + * debug_window_fread - Read function for "window" debugfs entry
  7967. + * @filp: The active open file structure for the debugfs "file"
  7968. + * @ubuf: The userspace provided buffer to read value into
  7969. + * @cnt: The maximum number of bytes to read
  7970. + * @ppos: The current "file" position
  7971. + *
  7972. + * This function provides a read implementation for the "window" debugfs
  7973. + * interface to the hardware latency detector. The window is the total time
  7974. + * in us that will be considered one sample period. Conceptually, windows
  7975. + * occur back-to-back and contain a sample width period during which
  7976. + * actual sampling occurs. Can be used to read the total window size.
  7977. + */
  7978. +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
  7979. + size_t cnt, loff_t *ppos)
  7980. +{
  7981. + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
  7982. +}
  7983. +
  7984. +/**
  7985. + * debug_window_fwrite - Write function for "window" debugfs entry
  7986. + * @filp: The active open file structure for the debugfs "file"
  7987. + * @ubuf: The user buffer that contains the value to write
  7988. + * @cnt: The maximum number of bytes to write to "file"
  7989. + * @ppos: The current position in the debugfs "file"
  7990. + *
  7991. + * This function provides a write implementation for the "window" debufds
  7992. + * interface to the hardware latency detetector. The window is the total time
  7993. + * in us that will be considered one sample period. Conceptually, windows
  7994. + * occur back-to-back and contain a sample width period during which
  7995. + * actual sampling occurs. Can be used to write a new total window size. It
  7996. + * is enfoced that any value written must be greater than the sample width
  7997. + * size, or an error results.
  7998. + */
  7999. +static ssize_t debug_window_fwrite(struct file *filp,
  8000. + const char __user *ubuf,
  8001. + size_t cnt,
  8002. + loff_t *ppos)
  8003. +{
  8004. + char buf[U64STR_SIZE];
  8005. + int csize = min(cnt, sizeof(buf));
  8006. + u64 val = 0;
  8007. + int err = 0;
  8008. +
  8009. + memset(buf, '\0', sizeof(buf));
  8010. + if (copy_from_user(buf, ubuf, csize))
  8011. + return -EFAULT;
  8012. +
  8013. + buf[U64STR_SIZE-1] = '\0'; /* just in case */
  8014. + err = kstrtoull(buf, 10, &val);
  8015. + if (0 != err)
  8016. + return -EINVAL;
  8017. +
  8018. + mutex_lock(&data.lock);
  8019. + if (data.sample_width < val)
  8020. + data.sample_window = val;
  8021. + else {
  8022. + mutex_unlock(&data.lock);
  8023. + return -EINVAL;
  8024. + }
  8025. + mutex_unlock(&data.lock);
  8026. +
  8027. + return csize;
  8028. +}
  8029. +
  8030. +/*
  8031. + * Function pointers for the "count" debugfs file operations
  8032. + */
  8033. +static const struct file_operations count_fops = {
  8034. + .open = debug_count_fopen,
  8035. + .read = debug_count_fread,
  8036. + .write = debug_count_fwrite,
  8037. + .owner = THIS_MODULE,
  8038. +};
  8039. +
  8040. +/*
  8041. + * Function pointers for the "enable" debugfs file operations
  8042. + */
  8043. +static const struct file_operations enable_fops = {
  8044. + .open = debug_enable_fopen,
  8045. + .read = debug_enable_fread,
  8046. + .write = debug_enable_fwrite,
  8047. + .owner = THIS_MODULE,
  8048. +};
  8049. +
  8050. +/*
  8051. + * Function pointers for the "max" debugfs file operations
  8052. + */
  8053. +static const struct file_operations max_fops = {
  8054. + .open = debug_max_fopen,
  8055. + .read = debug_max_fread,
  8056. + .write = debug_max_fwrite,
  8057. + .owner = THIS_MODULE,
  8058. +};
  8059. +
  8060. +/*
  8061. + * Function pointers for the "sample" debugfs file operations
  8062. + */
  8063. +static const struct file_operations sample_fops = {
  8064. + .open = debug_sample_fopen,
  8065. + .read = debug_sample_fread,
  8066. + .release = debug_sample_release,
  8067. + .owner = THIS_MODULE,
  8068. +};
  8069. +
  8070. +/*
  8071. + * Function pointers for the "threshold" debugfs file operations
  8072. + */
  8073. +static const struct file_operations threshold_fops = {
  8074. + .open = debug_threshold_fopen,
  8075. + .read = debug_threshold_fread,
  8076. + .write = debug_threshold_fwrite,
  8077. + .owner = THIS_MODULE,
  8078. +};
  8079. +
  8080. +/*
  8081. + * Function pointers for the "width" debugfs file operations
  8082. + */
  8083. +static const struct file_operations width_fops = {
  8084. + .open = debug_width_fopen,
  8085. + .read = debug_width_fread,
  8086. + .write = debug_width_fwrite,
  8087. + .owner = THIS_MODULE,
  8088. +};
  8089. +
  8090. +/*
  8091. + * Function pointers for the "window" debugfs file operations
  8092. + */
  8093. +static const struct file_operations window_fops = {
  8094. + .open = debug_window_fopen,
  8095. + .read = debug_window_fread,
  8096. + .write = debug_window_fwrite,
  8097. + .owner = THIS_MODULE,
  8098. +};
  8099. +
  8100. +/**
  8101. + * init_debugfs - A function to initialize the debugfs interface files
  8102. + *
  8103. + * This function creates entries in debugfs for "hwlat_detector", including
  8104. + * files to read values from the detector, current samples, and the
  8105. + * maximum sample that has been captured since the hardware latency
  8106. + * dectector was started.
  8107. + */
  8108. +static int init_debugfs(void)
  8109. +{
  8110. + int ret = -ENOMEM;
  8111. +
  8112. + debug_dir = debugfs_create_dir(DRVNAME, NULL);
  8113. + if (!debug_dir)
  8114. + goto err_debug_dir;
  8115. +
  8116. + debug_sample = debugfs_create_file("sample", 0444,
  8117. + debug_dir, NULL,
  8118. + &sample_fops);
  8119. + if (!debug_sample)
  8120. + goto err_sample;
  8121. +
  8122. + debug_count = debugfs_create_file("count", 0444,
  8123. + debug_dir, NULL,
  8124. + &count_fops);
  8125. + if (!debug_count)
  8126. + goto err_count;
  8127. +
  8128. + debug_max = debugfs_create_file("max", 0444,
  8129. + debug_dir, NULL,
  8130. + &max_fops);
  8131. + if (!debug_max)
  8132. + goto err_max;
  8133. +
  8134. + debug_sample_window = debugfs_create_file("window", 0644,
  8135. + debug_dir, NULL,
  8136. + &window_fops);
  8137. + if (!debug_sample_window)
  8138. + goto err_window;
  8139. +
  8140. + debug_sample_width = debugfs_create_file("width", 0644,
  8141. + debug_dir, NULL,
  8142. + &width_fops);
  8143. + if (!debug_sample_width)
  8144. + goto err_width;
  8145. +
  8146. + debug_threshold = debugfs_create_file("threshold", 0644,
  8147. + debug_dir, NULL,
  8148. + &threshold_fops);
  8149. + if (!debug_threshold)
  8150. + goto err_threshold;
  8151. +
  8152. + debug_enable = debugfs_create_file("enable", 0644,
  8153. + debug_dir, &enabled,
  8154. + &enable_fops);
  8155. + if (!debug_enable)
  8156. + goto err_enable;
  8157. +
  8158. + else {
  8159. + ret = 0;
  8160. + goto out;
  8161. + }
  8162. +
  8163. +err_enable:
  8164. + debugfs_remove(debug_threshold);
  8165. +err_threshold:
  8166. + debugfs_remove(debug_sample_width);
  8167. +err_width:
  8168. + debugfs_remove(debug_sample_window);
  8169. +err_window:
  8170. + debugfs_remove(debug_max);
  8171. +err_max:
  8172. + debugfs_remove(debug_count);
  8173. +err_count:
  8174. + debugfs_remove(debug_sample);
  8175. +err_sample:
  8176. + debugfs_remove(debug_dir);
  8177. +err_debug_dir:
  8178. +out:
  8179. + return ret;
  8180. +}
  8181. +
  8182. +/**
  8183. + * free_debugfs - A function to cleanup the debugfs file interface
  8184. + */
  8185. +static void free_debugfs(void)
  8186. +{
  8187. + /* could also use a debugfs_remove_recursive */
  8188. + debugfs_remove(debug_enable);
  8189. + debugfs_remove(debug_threshold);
  8190. + debugfs_remove(debug_sample_width);
  8191. + debugfs_remove(debug_sample_window);
  8192. + debugfs_remove(debug_max);
  8193. + debugfs_remove(debug_count);
  8194. + debugfs_remove(debug_sample);
  8195. + debugfs_remove(debug_dir);
  8196. +}
  8197. +
  8198. +/**
  8199. + * detector_init - Standard module initialization code
  8200. + */
  8201. +static int detector_init(void)
  8202. +{
  8203. + int ret = -ENOMEM;
  8204. +
  8205. + pr_info(BANNER "version %s\n", VERSION);
  8206. +
  8207. + ret = init_stats();
  8208. + if (0 != ret)
  8209. + goto out;
  8210. +
  8211. + ret = init_debugfs();
  8212. + if (0 != ret)
  8213. + goto err_stats;
  8214. +
  8215. + if (enabled)
  8216. + ret = start_kthread();
  8217. +
  8218. + goto out;
  8219. +
  8220. +err_stats:
  8221. + ring_buffer_free(ring_buffer);
  8222. +out:
  8223. + return ret;
  8224. +
  8225. +}
  8226. +
  8227. +/**
  8228. + * detector_exit - Standard module cleanup code
  8229. + */
  8230. +static void detector_exit(void)
  8231. +{
  8232. + int err;
  8233. +
  8234. + if (enabled) {
  8235. + enabled = 0;
  8236. + err = stop_kthread();
  8237. + if (err)
  8238. + pr_err(BANNER "cannot stop kthread\n");
  8239. + }
  8240. +
  8241. + free_debugfs();
  8242. + ring_buffer_free(ring_buffer); /* free up the ring buffer */
  8243. +
  8244. +}
  8245. +
  8246. +module_init(detector_init);
  8247. +module_exit(detector_exit);
  8248. diff -Nur linux-4.1.39.orig/drivers/misc/Kconfig linux-4.1.39/drivers/misc/Kconfig
  8249. --- linux-4.1.39.orig/drivers/misc/Kconfig 2017-03-13 21:04:36.000000000 +0100
  8250. +++ linux-4.1.39/drivers/misc/Kconfig 2017-04-18 17:56:30.585396045 +0200
  8251. @@ -54,6 +54,7 @@
  8252. config ATMEL_TCLIB
  8253. bool "Atmel AT32/AT91 Timer/Counter Library"
  8254. depends on (AVR32 || ARCH_AT91)
  8255. + default y if PREEMPT_RT_FULL
  8256. help
  8257. Select this if you want a library to allocate the Timer/Counter
  8258. blocks found on many Atmel processors. This facilitates using
  8259. @@ -69,8 +70,7 @@
  8260. are combined to make a single 32-bit timer.
  8261. When GENERIC_CLOCKEVENTS is defined, the third timer channel
  8262. - may be used as a clock event device supporting oneshot mode
  8263. - (delays of up to two seconds) based on the 32 KiHz clock.
  8264. + may be used as a clock event device supporting oneshot mode.
  8265. config ATMEL_TCB_CLKSRC_BLOCK
  8266. int
  8267. @@ -84,6 +84,15 @@
  8268. TC can be used for other purposes, such as PWM generation and
  8269. interval timing.
  8270. +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
  8271. + bool "TC Block use 32 KiHz clock"
  8272. + depends on ATMEL_TCB_CLKSRC
  8273. + default y if !PREEMPT_RT_FULL
  8274. + help
  8275. + Select this to use 32 KiHz base clock rate as TC block clock
  8276. + source for clock events.
  8277. +
  8278. +
  8279. config DUMMY_IRQ
  8280. tristate "Dummy IRQ handler"
  8281. default n
  8282. @@ -113,6 +122,35 @@
  8283. for information on the specific driver level and support statement
  8284. for your IBM server.
  8285. +config HWLAT_DETECTOR
  8286. + tristate "Testing module to detect hardware-induced latencies"
  8287. + depends on DEBUG_FS
  8288. + depends on RING_BUFFER
  8289. + default m
  8290. + ---help---
  8291. + A simple hardware latency detector. Use this module to detect
  8292. + large latencies introduced by the behavior of the underlying
  8293. + system firmware external to Linux. We do this using periodic
  8294. + use of stop_machine to grab all available CPUs and measure
  8295. + for unexplainable gaps in the CPU timestamp counter(s). By
  8296. + default, the module is not enabled until the "enable" file
  8297. + within the "hwlat_detector" debugfs directory is toggled.
  8298. +
  8299. + This module is often used to detect SMI (System Management
  8300. + Interrupts) on x86 systems, though is not x86 specific. To
  8301. + this end, we default to using a sample window of 1 second,
  8302. + during which we will sample for 0.5 seconds. If an SMI or
  8303. + similar event occurs during that time, it is recorded
  8304. + into an 8K samples global ring buffer until retreived.
  8305. +
  8306. + WARNING: This software should never be enabled (it can be built
  8307. + but should not be turned on after it is loaded) in a production
  8308. + environment where high latencies are a concern since the
  8309. + sampling mechanism actually introduces latencies for
  8310. + regular tasks while the CPU(s) are being held.
  8311. +
  8312. + If unsure, say N
  8313. +
  8314. config PHANTOM
  8315. tristate "Sensable PHANToM (PCI)"
  8316. depends on PCI
  8317. diff -Nur linux-4.1.39.orig/drivers/misc/Makefile linux-4.1.39/drivers/misc/Makefile
  8318. --- linux-4.1.39.orig/drivers/misc/Makefile 2017-03-13 21:04:36.000000000 +0100
  8319. +++ linux-4.1.39/drivers/misc/Makefile 2017-04-18 17:56:30.585396045 +0200
  8320. @@ -38,6 +38,7 @@
  8321. obj-$(CONFIG_HMC6352) += hmc6352.o
  8322. obj-y += eeprom/
  8323. obj-y += cb710/
  8324. +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o
  8325. obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o
  8326. obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o
  8327. obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o
  8328. diff -Nur linux-4.1.39.orig/drivers/mmc/host/mmci.c linux-4.1.39/drivers/mmc/host/mmci.c
  8329. --- linux-4.1.39.orig/drivers/mmc/host/mmci.c 2017-03-13 21:04:36.000000000 +0100
  8330. +++ linux-4.1.39/drivers/mmc/host/mmci.c 2017-04-18 17:56:30.589396200 +0200
  8331. @@ -1155,15 +1155,12 @@
  8332. struct sg_mapping_iter *sg_miter = &host->sg_miter;
  8333. struct variant_data *variant = host->variant;
  8334. void __iomem *base = host->base;
  8335. - unsigned long flags;
  8336. u32 status;
  8337. status = readl(base + MMCISTATUS);
  8338. dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
  8339. - local_irq_save(flags);
  8340. -
  8341. do {
  8342. unsigned int remain, len;
  8343. char *buffer;
  8344. @@ -1203,8 +1200,6 @@
  8345. sg_miter_stop(sg_miter);
  8346. - local_irq_restore(flags);
  8347. -
  8348. /*
  8349. * If we have less than the fifo 'half-full' threshold to transfer,
  8350. * trigger a PIO interrupt as soon as any data is available.
  8351. diff -Nur linux-4.1.39.orig/drivers/net/ethernet/3com/3c59x.c linux-4.1.39/drivers/net/ethernet/3com/3c59x.c
  8352. --- linux-4.1.39.orig/drivers/net/ethernet/3com/3c59x.c 2017-03-13 21:04:36.000000000 +0100
  8353. +++ linux-4.1.39/drivers/net/ethernet/3com/3c59x.c 2017-04-18 17:56:30.589396200 +0200
  8354. @@ -842,9 +842,9 @@
  8355. {
  8356. struct vortex_private *vp = netdev_priv(dev);
  8357. unsigned long flags;
  8358. - local_irq_save(flags);
  8359. + local_irq_save_nort(flags);
  8360. (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
  8361. - local_irq_restore(flags);
  8362. + local_irq_restore_nort(flags);
  8363. }
  8364. #endif
  8365. @@ -1916,12 +1916,12 @@
  8366. * Block interrupts because vortex_interrupt does a bare spin_lock()
  8367. */
  8368. unsigned long flags;
  8369. - local_irq_save(flags);
  8370. + local_irq_save_nort(flags);
  8371. if (vp->full_bus_master_tx)
  8372. boomerang_interrupt(dev->irq, dev);
  8373. else
  8374. vortex_interrupt(dev->irq, dev);
  8375. - local_irq_restore(flags);
  8376. + local_irq_restore_nort(flags);
  8377. }
  8378. }
  8379. diff -Nur linux-4.1.39.orig/drivers/net/ethernet/atheros/atl1c/atl1c_main.c linux-4.1.39/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
  8380. --- linux-4.1.39.orig/drivers/net/ethernet/atheros/atl1c/atl1c_main.c 2017-03-13 21:04:36.000000000 +0100
  8381. +++ linux-4.1.39/drivers/net/ethernet/atheros/atl1c/atl1c_main.c 2017-04-18 17:56:30.589396200 +0200
  8382. @@ -2212,11 +2212,7 @@
  8383. }
  8384. tpd_req = atl1c_cal_tpd_req(skb);
  8385. - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
  8386. - if (netif_msg_pktdata(adapter))
  8387. - dev_info(&adapter->pdev->dev, "tx locked\n");
  8388. - return NETDEV_TX_LOCKED;
  8389. - }
  8390. + spin_lock_irqsave(&adapter->tx_lock, flags);
  8391. if (atl1c_tpd_avail(adapter, type) < tpd_req) {
  8392. /* no enough descriptor, just stop queue */
  8393. diff -Nur linux-4.1.39.orig/drivers/net/ethernet/atheros/atl1e/atl1e_main.c linux-4.1.39/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
  8394. --- linux-4.1.39.orig/drivers/net/ethernet/atheros/atl1e/atl1e_main.c 2017-03-13 21:04:36.000000000 +0100
  8395. +++ linux-4.1.39/drivers/net/ethernet/atheros/atl1e/atl1e_main.c 2017-04-18 17:56:30.589396200 +0200
  8396. @@ -1880,8 +1880,7 @@
  8397. return NETDEV_TX_OK;
  8398. }
  8399. tpd_req = atl1e_cal_tdp_req(skb);
  8400. - if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
  8401. - return NETDEV_TX_LOCKED;
  8402. + spin_lock_irqsave(&adapter->tx_lock, flags);
  8403. if (atl1e_tpd_avail(adapter) < tpd_req) {
  8404. /* no enough descriptor, just stop queue */
  8405. diff -Nur linux-4.1.39.orig/drivers/net/ethernet/chelsio/cxgb/sge.c linux-4.1.39/drivers/net/ethernet/chelsio/cxgb/sge.c
  8406. --- linux-4.1.39.orig/drivers/net/ethernet/chelsio/cxgb/sge.c 2017-03-13 21:04:36.000000000 +0100
  8407. +++ linux-4.1.39/drivers/net/ethernet/chelsio/cxgb/sge.c 2017-04-18 17:56:30.589396200 +0200
  8408. @@ -1664,8 +1664,7 @@
  8409. struct cmdQ *q = &sge->cmdQ[qid];
  8410. unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
  8411. - if (!spin_trylock(&q->lock))
  8412. - return NETDEV_TX_LOCKED;
  8413. + spin_lock(&q->lock);
  8414. reclaim_completed_tx(sge, q);
  8415. diff -Nur linux-4.1.39.orig/drivers/net/ethernet/freescale/gianfar.c linux-4.1.39/drivers/net/ethernet/freescale/gianfar.c
  8416. --- linux-4.1.39.orig/drivers/net/ethernet/freescale/gianfar.c 2017-03-13 21:04:36.000000000 +0100
  8417. +++ linux-4.1.39/drivers/net/ethernet/freescale/gianfar.c 2017-04-18 17:56:30.589396200 +0200
  8418. @@ -1540,7 +1540,7 @@
  8419. if (netif_running(ndev)) {
  8420. - local_irq_save(flags);
  8421. + local_irq_save_nort(flags);
  8422. lock_tx_qs(priv);
  8423. gfar_halt_nodisable(priv);
  8424. @@ -1556,7 +1556,7 @@
  8425. gfar_write(&regs->maccfg1, tempval);
  8426. unlock_tx_qs(priv);
  8427. - local_irq_restore(flags);
  8428. + local_irq_restore_nort(flags);
  8429. disable_napi(priv);
  8430. @@ -1598,7 +1598,7 @@
  8431. /* Disable Magic Packet mode, in case something
  8432. * else woke us up.
  8433. */
  8434. - local_irq_save(flags);
  8435. + local_irq_save_nort(flags);
  8436. lock_tx_qs(priv);
  8437. tempval = gfar_read(&regs->maccfg2);
  8438. @@ -1608,7 +1608,7 @@
  8439. gfar_start(priv);
  8440. unlock_tx_qs(priv);
  8441. - local_irq_restore(flags);
  8442. + local_irq_restore_nort(flags);
  8443. netif_device_attach(ndev);
  8444. @@ -3418,14 +3418,14 @@
  8445. dev->stats.tx_dropped++;
  8446. atomic64_inc(&priv->extra_stats.tx_underrun);
  8447. - local_irq_save(flags);
  8448. + local_irq_save_nort(flags);
  8449. lock_tx_qs(priv);
  8450. /* Reactivate the Tx Queues */
  8451. gfar_write(&regs->tstat, gfargrp->tstat);
  8452. unlock_tx_qs(priv);
  8453. - local_irq_restore(flags);
  8454. + local_irq_restore_nort(flags);
  8455. }
  8456. netif_dbg(priv, tx_err, dev, "Transmit Error\n");
  8457. }
  8458. diff -Nur linux-4.1.39.orig/drivers/net/ethernet/neterion/s2io.c linux-4.1.39/drivers/net/ethernet/neterion/s2io.c
  8459. --- linux-4.1.39.orig/drivers/net/ethernet/neterion/s2io.c 2017-03-13 21:04:36.000000000 +0100
  8460. +++ linux-4.1.39/drivers/net/ethernet/neterion/s2io.c 2017-04-18 17:56:30.589396200 +0200
  8461. @@ -4084,12 +4084,7 @@
  8462. [skb->priority & (MAX_TX_FIFOS - 1)];
  8463. fifo = &mac_control->fifos[queue];
  8464. - if (do_spin_lock)
  8465. - spin_lock_irqsave(&fifo->tx_lock, flags);
  8466. - else {
  8467. - if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
  8468. - return NETDEV_TX_LOCKED;
  8469. - }
  8470. + spin_lock_irqsave(&fifo->tx_lock, flags);
  8471. if (sp->config.multiq) {
  8472. if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
  8473. diff -Nur linux-4.1.39.orig/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c linux-4.1.39/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
  8474. --- linux-4.1.39.orig/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c 2017-03-13 21:04:36.000000000 +0100
  8475. +++ linux-4.1.39/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c 2017-04-18 17:56:30.589396200 +0200
  8476. @@ -2137,10 +2137,8 @@
  8477. struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
  8478. unsigned long flags;
  8479. - if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
  8480. - /* Collision - tell upper layer to requeue */
  8481. - return NETDEV_TX_LOCKED;
  8482. - }
  8483. + spin_lock_irqsave(&tx_ring->tx_lock, flags);
  8484. +
  8485. if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
  8486. netif_stop_queue(netdev);
  8487. spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
  8488. diff -Nur linux-4.1.39.orig/drivers/net/ethernet/realtek/8139too.c linux-4.1.39/drivers/net/ethernet/realtek/8139too.c
  8489. --- linux-4.1.39.orig/drivers/net/ethernet/realtek/8139too.c 2017-03-13 21:04:36.000000000 +0100
  8490. +++ linux-4.1.39/drivers/net/ethernet/realtek/8139too.c 2017-04-18 17:56:30.593396355 +0200
  8491. @@ -2229,7 +2229,7 @@
  8492. struct rtl8139_private *tp = netdev_priv(dev);
  8493. const int irq = tp->pci_dev->irq;
  8494. - disable_irq(irq);
  8495. + disable_irq_nosync(irq);
  8496. rtl8139_interrupt(irq, dev);
  8497. enable_irq(irq);
  8498. }
  8499. diff -Nur linux-4.1.39.orig/drivers/net/ethernet/tehuti/tehuti.c linux-4.1.39/drivers/net/ethernet/tehuti/tehuti.c
  8500. --- linux-4.1.39.orig/drivers/net/ethernet/tehuti/tehuti.c 2017-03-13 21:04:36.000000000 +0100
  8501. +++ linux-4.1.39/drivers/net/ethernet/tehuti/tehuti.c 2017-04-18 17:56:30.593396355 +0200
  8502. @@ -1629,13 +1629,8 @@
  8503. unsigned long flags;
  8504. ENTER;
  8505. - local_irq_save(flags);
  8506. - if (!spin_trylock(&priv->tx_lock)) {
  8507. - local_irq_restore(flags);
  8508. - DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
  8509. - BDX_DRV_NAME, ndev->name);
  8510. - return NETDEV_TX_LOCKED;
  8511. - }
  8512. +
  8513. + spin_lock_irqsave(&priv->tx_lock, flags);
  8514. /* build tx descriptor */
  8515. BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */
  8516. diff -Nur linux-4.1.39.orig/drivers/net/rionet.c linux-4.1.39/drivers/net/rionet.c
  8517. --- linux-4.1.39.orig/drivers/net/rionet.c 2017-03-13 21:04:36.000000000 +0100
  8518. +++ linux-4.1.39/drivers/net/rionet.c 2017-04-18 17:56:30.593396355 +0200
  8519. @@ -174,11 +174,7 @@
  8520. unsigned long flags;
  8521. int add_num = 1;
  8522. - local_irq_save(flags);
  8523. - if (!spin_trylock(&rnet->tx_lock)) {
  8524. - local_irq_restore(flags);
  8525. - return NETDEV_TX_LOCKED;
  8526. - }
  8527. + spin_lock_irqsave(&rnet->tx_lock, flags);
  8528. if (is_multicast_ether_addr(eth->h_dest))
  8529. add_num = nets[rnet->mport->id].nact;
  8530. diff -Nur linux-4.1.39.orig/drivers/net/wireless/orinoco/orinoco_usb.c linux-4.1.39/drivers/net/wireless/orinoco/orinoco_usb.c
  8531. --- linux-4.1.39.orig/drivers/net/wireless/orinoco/orinoco_usb.c 2017-03-13 21:04:36.000000000 +0100
  8532. +++ linux-4.1.39/drivers/net/wireless/orinoco/orinoco_usb.c 2017-04-18 17:56:30.593396355 +0200
  8533. @@ -697,7 +697,7 @@
  8534. while (!ctx->done.done && msecs--)
  8535. udelay(1000);
  8536. } else {
  8537. - wait_event_interruptible(ctx->done.wait,
  8538. + swait_event_interruptible(ctx->done.wait,
  8539. ctx->done.done);
  8540. }
  8541. break;
  8542. diff -Nur linux-4.1.39.orig/drivers/pci/access.c linux-4.1.39/drivers/pci/access.c
  8543. --- linux-4.1.39.orig/drivers/pci/access.c 2017-03-13 21:04:36.000000000 +0100
  8544. +++ linux-4.1.39/drivers/pci/access.c 2017-04-18 17:56:30.593396355 +0200
  8545. @@ -561,7 +561,7 @@
  8546. WARN_ON(!dev->block_cfg_access);
  8547. dev->block_cfg_access = 0;
  8548. - wake_up_all(&pci_cfg_wait);
  8549. + wake_up_all_locked(&pci_cfg_wait);
  8550. raw_spin_unlock_irqrestore(&pci_lock, flags);
  8551. }
  8552. EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
  8553. diff -Nur linux-4.1.39.orig/drivers/pinctrl/qcom/pinctrl-msm.c linux-4.1.39/drivers/pinctrl/qcom/pinctrl-msm.c
  8554. --- linux-4.1.39.orig/drivers/pinctrl/qcom/pinctrl-msm.c 2017-03-13 21:04:36.000000000 +0100
  8555. +++ linux-4.1.39/drivers/pinctrl/qcom/pinctrl-msm.c 2017-04-18 17:56:30.593396355 +0200
  8556. @@ -59,7 +59,7 @@
  8557. struct notifier_block restart_nb;
  8558. int irq;
  8559. - spinlock_t lock;
  8560. + raw_spinlock_t lock;
  8561. DECLARE_BITMAP(dual_edge_irqs, MAX_NR_GPIO);
  8562. DECLARE_BITMAP(enabled_irqs, MAX_NR_GPIO);
  8563. @@ -155,14 +155,14 @@
  8564. if (WARN_ON(i == g->nfuncs))
  8565. return -EINVAL;
  8566. - spin_lock_irqsave(&pctrl->lock, flags);
  8567. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  8568. val = readl(pctrl->regs + g->ctl_reg);
  8569. val &= ~(0x7 << g->mux_bit);
  8570. val |= i << g->mux_bit;
  8571. writel(val, pctrl->regs + g->ctl_reg);
  8572. - spin_unlock_irqrestore(&pctrl->lock, flags);
  8573. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  8574. return 0;
  8575. }
  8576. @@ -325,14 +325,14 @@
  8577. break;
  8578. case PIN_CONFIG_OUTPUT:
  8579. /* set output value */
  8580. - spin_lock_irqsave(&pctrl->lock, flags);
  8581. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  8582. val = readl(pctrl->regs + g->io_reg);
  8583. if (arg)
  8584. val |= BIT(g->out_bit);
  8585. else
  8586. val &= ~BIT(g->out_bit);
  8587. writel(val, pctrl->regs + g->io_reg);
  8588. - spin_unlock_irqrestore(&pctrl->lock, flags);
  8589. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  8590. /* enable output */
  8591. arg = 1;
  8592. @@ -353,12 +353,12 @@
  8593. return -EINVAL;
  8594. }
  8595. - spin_lock_irqsave(&pctrl->lock, flags);
  8596. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  8597. val = readl(pctrl->regs + g->ctl_reg);
  8598. val &= ~(mask << bit);
  8599. val |= arg << bit;
  8600. writel(val, pctrl->regs + g->ctl_reg);
  8601. - spin_unlock_irqrestore(&pctrl->lock, flags);
  8602. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  8603. }
  8604. return 0;
  8605. @@ -386,13 +386,13 @@
  8606. g = &pctrl->soc->groups[offset];
  8607. - spin_lock_irqsave(&pctrl->lock, flags);
  8608. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  8609. val = readl(pctrl->regs + g->ctl_reg);
  8610. val &= ~BIT(g->oe_bit);
  8611. writel(val, pctrl->regs + g->ctl_reg);
  8612. - spin_unlock_irqrestore(&pctrl->lock, flags);
  8613. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  8614. return 0;
  8615. }
  8616. @@ -406,7 +406,7 @@
  8617. g = &pctrl->soc->groups[offset];
  8618. - spin_lock_irqsave(&pctrl->lock, flags);
  8619. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  8620. val = readl(pctrl->regs + g->io_reg);
  8621. if (value)
  8622. @@ -419,7 +419,7 @@
  8623. val |= BIT(g->oe_bit);
  8624. writel(val, pctrl->regs + g->ctl_reg);
  8625. - spin_unlock_irqrestore(&pctrl->lock, flags);
  8626. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  8627. return 0;
  8628. }
  8629. @@ -445,7 +445,7 @@
  8630. g = &pctrl->soc->groups[offset];
  8631. - spin_lock_irqsave(&pctrl->lock, flags);
  8632. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  8633. val = readl(pctrl->regs + g->io_reg);
  8634. if (value)
  8635. @@ -454,7 +454,7 @@
  8636. val &= ~BIT(g->out_bit);
  8637. writel(val, pctrl->regs + g->io_reg);
  8638. - spin_unlock_irqrestore(&pctrl->lock, flags);
  8639. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  8640. }
  8641. static int msm_gpio_request(struct gpio_chip *chip, unsigned offset)
  8642. @@ -585,7 +585,7 @@
  8643. g = &pctrl->soc->groups[d->hwirq];
  8644. - spin_lock_irqsave(&pctrl->lock, flags);
  8645. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  8646. val = readl(pctrl->regs + g->intr_cfg_reg);
  8647. val &= ~BIT(g->intr_enable_bit);
  8648. @@ -593,7 +593,7 @@
  8649. clear_bit(d->hwirq, pctrl->enabled_irqs);
  8650. - spin_unlock_irqrestore(&pctrl->lock, flags);
  8651. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  8652. }
  8653. static void msm_gpio_irq_unmask(struct irq_data *d)
  8654. @@ -606,7 +606,7 @@
  8655. g = &pctrl->soc->groups[d->hwirq];
  8656. - spin_lock_irqsave(&pctrl->lock, flags);
  8657. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  8658. val = readl(pctrl->regs + g->intr_status_reg);
  8659. val &= ~BIT(g->intr_status_bit);
  8660. @@ -618,7 +618,7 @@
  8661. set_bit(d->hwirq, pctrl->enabled_irqs);
  8662. - spin_unlock_irqrestore(&pctrl->lock, flags);
  8663. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  8664. }
  8665. static void msm_gpio_irq_ack(struct irq_data *d)
  8666. @@ -631,7 +631,7 @@
  8667. g = &pctrl->soc->groups[d->hwirq];
  8668. - spin_lock_irqsave(&pctrl->lock, flags);
  8669. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  8670. val = readl(pctrl->regs + g->intr_status_reg);
  8671. if (g->intr_ack_high)
  8672. @@ -643,7 +643,7 @@
  8673. if (test_bit(d->hwirq, pctrl->dual_edge_irqs))
  8674. msm_gpio_update_dual_edge_pos(pctrl, g, d);
  8675. - spin_unlock_irqrestore(&pctrl->lock, flags);
  8676. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  8677. }
  8678. static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
  8679. @@ -656,7 +656,7 @@
  8680. g = &pctrl->soc->groups[d->hwirq];
  8681. - spin_lock_irqsave(&pctrl->lock, flags);
  8682. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  8683. /*
  8684. * For hw without possibility of detecting both edges
  8685. @@ -730,7 +730,7 @@
  8686. if (test_bit(d->hwirq, pctrl->dual_edge_irqs))
  8687. msm_gpio_update_dual_edge_pos(pctrl, g, d);
  8688. - spin_unlock_irqrestore(&pctrl->lock, flags);
  8689. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  8690. if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
  8691. __irq_set_handler_locked(d->irq, handle_level_irq);
  8692. @@ -746,11 +746,11 @@
  8693. struct msm_pinctrl *pctrl = to_msm_pinctrl(gc);
  8694. unsigned long flags;
  8695. - spin_lock_irqsave(&pctrl->lock, flags);
  8696. + raw_spin_lock_irqsave(&pctrl->lock, flags);
  8697. irq_set_irq_wake(pctrl->irq, on);
  8698. - spin_unlock_irqrestore(&pctrl->lock, flags);
  8699. + raw_spin_unlock_irqrestore(&pctrl->lock, flags);
  8700. return 0;
  8701. }
  8702. @@ -887,7 +887,7 @@
  8703. pctrl->soc = soc_data;
  8704. pctrl->chip = msm_gpio_template;
  8705. - spin_lock_init(&pctrl->lock);
  8706. + raw_spin_lock_init(&pctrl->lock);
  8707. res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
  8708. pctrl->regs = devm_ioremap_resource(&pdev->dev, res);
  8709. diff -Nur linux-4.1.39.orig/drivers/scsi/fcoe/fcoe.c linux-4.1.39/drivers/scsi/fcoe/fcoe.c
  8710. --- linux-4.1.39.orig/drivers/scsi/fcoe/fcoe.c 2017-03-13 21:04:36.000000000 +0100
  8711. +++ linux-4.1.39/drivers/scsi/fcoe/fcoe.c 2017-04-18 17:56:30.593396355 +0200
  8712. @@ -1287,7 +1287,7 @@
  8713. struct sk_buff *skb;
  8714. #ifdef CONFIG_SMP
  8715. struct fcoe_percpu_s *p0;
  8716. - unsigned targ_cpu = get_cpu();
  8717. + unsigned targ_cpu = get_cpu_light();
  8718. #endif /* CONFIG_SMP */
  8719. FCOE_DBG("Destroying receive thread for CPU %d\n", cpu);
  8720. @@ -1343,7 +1343,7 @@
  8721. kfree_skb(skb);
  8722. spin_unlock_bh(&p->fcoe_rx_list.lock);
  8723. }
  8724. - put_cpu();
  8725. + put_cpu_light();
  8726. #else
  8727. /*
  8728. * This a non-SMP scenario where the singular Rx thread is
  8729. @@ -1567,11 +1567,11 @@
  8730. static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
  8731. {
  8732. struct fcoe_percpu_s *fps;
  8733. - int rc;
  8734. + int rc, cpu = get_cpu_light();
  8735. - fps = &get_cpu_var(fcoe_percpu);
  8736. + fps = &per_cpu(fcoe_percpu, cpu);
  8737. rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
  8738. - put_cpu_var(fcoe_percpu);
  8739. + put_cpu_light();
  8740. return rc;
  8741. }
  8742. @@ -1767,11 +1767,11 @@
  8743. return 0;
  8744. }
  8745. - stats = per_cpu_ptr(lport->stats, get_cpu());
  8746. + stats = per_cpu_ptr(lport->stats, get_cpu_light());
  8747. stats->InvalidCRCCount++;
  8748. if (stats->InvalidCRCCount < 5)
  8749. printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
  8750. - put_cpu();
  8751. + put_cpu_light();
  8752. return -EINVAL;
  8753. }
  8754. @@ -1815,7 +1815,7 @@
  8755. */
  8756. hp = (struct fcoe_hdr *) skb_network_header(skb);
  8757. - stats = per_cpu_ptr(lport->stats, get_cpu());
  8758. + stats = per_cpu_ptr(lport->stats, get_cpu_light());
  8759. if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
  8760. if (stats->ErrorFrames < 5)
  8761. printk(KERN_WARNING "fcoe: FCoE version "
  8762. @@ -1847,13 +1847,13 @@
  8763. goto drop;
  8764. if (!fcoe_filter_frames(lport, fp)) {
  8765. - put_cpu();
  8766. + put_cpu_light();
  8767. fc_exch_recv(lport, fp);
  8768. return;
  8769. }
  8770. drop:
  8771. stats->ErrorFrames++;
  8772. - put_cpu();
  8773. + put_cpu_light();
  8774. kfree_skb(skb);
  8775. }
  8776. diff -Nur linux-4.1.39.orig/drivers/scsi/fcoe/fcoe_ctlr.c linux-4.1.39/drivers/scsi/fcoe/fcoe_ctlr.c
  8777. --- linux-4.1.39.orig/drivers/scsi/fcoe/fcoe_ctlr.c 2017-03-13 21:04:36.000000000 +0100
  8778. +++ linux-4.1.39/drivers/scsi/fcoe/fcoe_ctlr.c 2017-04-18 17:56:30.593396355 +0200
  8779. @@ -831,7 +831,7 @@
  8780. INIT_LIST_HEAD(&del_list);
  8781. - stats = per_cpu_ptr(fip->lp->stats, get_cpu());
  8782. + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
  8783. list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
  8784. deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
  8785. @@ -867,7 +867,7 @@
  8786. sel_time = fcf->time;
  8787. }
  8788. }
  8789. - put_cpu();
  8790. + put_cpu_light();
  8791. list_for_each_entry_safe(fcf, next, &del_list, list) {
  8792. /* Removes fcf from current list */
  8793. diff -Nur linux-4.1.39.orig/drivers/scsi/libfc/fc_exch.c linux-4.1.39/drivers/scsi/libfc/fc_exch.c
  8794. --- linux-4.1.39.orig/drivers/scsi/libfc/fc_exch.c 2017-03-13 21:04:36.000000000 +0100
  8795. +++ linux-4.1.39/drivers/scsi/libfc/fc_exch.c 2017-04-18 17:56:30.593396355 +0200
  8796. @@ -814,10 +814,10 @@
  8797. }
  8798. memset(ep, 0, sizeof(*ep));
  8799. - cpu = get_cpu();
  8800. + cpu = get_cpu_light();
  8801. pool = per_cpu_ptr(mp->pool, cpu);
  8802. spin_lock_bh(&pool->lock);
  8803. - put_cpu();
  8804. + put_cpu_light();
  8805. /* peek cache of free slot */
  8806. if (pool->left != FC_XID_UNKNOWN) {
  8807. diff -Nur linux-4.1.39.orig/drivers/scsi/libsas/sas_ata.c linux-4.1.39/drivers/scsi/libsas/sas_ata.c
  8808. --- linux-4.1.39.orig/drivers/scsi/libsas/sas_ata.c 2017-03-13 21:04:36.000000000 +0100
  8809. +++ linux-4.1.39/drivers/scsi/libsas/sas_ata.c 2017-04-18 17:56:30.593396355 +0200
  8810. @@ -190,7 +190,7 @@
  8811. /* TODO: audit callers to ensure they are ready for qc_issue to
  8812. * unconditionally re-enable interrupts
  8813. */
  8814. - local_irq_save(flags);
  8815. + local_irq_save_nort(flags);
  8816. spin_unlock(ap->lock);
  8817. /* If the device fell off, no sense in issuing commands */
  8818. @@ -255,7 +255,7 @@
  8819. out:
  8820. spin_lock(ap->lock);
  8821. - local_irq_restore(flags);
  8822. + local_irq_restore_nort(flags);
  8823. return ret;
  8824. }
  8825. diff -Nur linux-4.1.39.orig/drivers/scsi/qla2xxx/qla_inline.h linux-4.1.39/drivers/scsi/qla2xxx/qla_inline.h
  8826. --- linux-4.1.39.orig/drivers/scsi/qla2xxx/qla_inline.h 2017-03-13 21:04:36.000000000 +0100
  8827. +++ linux-4.1.39/drivers/scsi/qla2xxx/qla_inline.h 2017-04-18 17:56:30.593396355 +0200
  8828. @@ -59,12 +59,12 @@
  8829. {
  8830. unsigned long flags;
  8831. struct qla_hw_data *ha = rsp->hw;
  8832. - local_irq_save(flags);
  8833. + local_irq_save_nort(flags);
  8834. if (IS_P3P_TYPE(ha))
  8835. qla82xx_poll(0, rsp);
  8836. else
  8837. ha->isp_ops->intr_handler(0, rsp);
  8838. - local_irq_restore(flags);
  8839. + local_irq_restore_nort(flags);
  8840. }
  8841. static inline uint8_t *
  8842. diff -Nur linux-4.1.39.orig/drivers/thermal/x86_pkg_temp_thermal.c linux-4.1.39/drivers/thermal/x86_pkg_temp_thermal.c
  8843. --- linux-4.1.39.orig/drivers/thermal/x86_pkg_temp_thermal.c 2017-03-13 21:04:36.000000000 +0100
  8844. +++ linux-4.1.39/drivers/thermal/x86_pkg_temp_thermal.c 2017-04-18 17:56:30.593396355 +0200
  8845. @@ -29,6 +29,7 @@
  8846. #include <linux/pm.h>
  8847. #include <linux/thermal.h>
  8848. #include <linux/debugfs.h>
  8849. +#include <linux/work-simple.h>
  8850. #include <asm/cpu_device_id.h>
  8851. #include <asm/mce.h>
  8852. @@ -352,7 +353,7 @@
  8853. }
  8854. }
  8855. -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  8856. +static void platform_thermal_notify_work(struct swork_event *event)
  8857. {
  8858. unsigned long flags;
  8859. int cpu = smp_processor_id();
  8860. @@ -369,7 +370,7 @@
  8861. pkg_work_scheduled[phy_id]) {
  8862. disable_pkg_thres_interrupt();
  8863. spin_unlock_irqrestore(&pkg_work_lock, flags);
  8864. - return -EINVAL;
  8865. + return;
  8866. }
  8867. pkg_work_scheduled[phy_id] = 1;
  8868. spin_unlock_irqrestore(&pkg_work_lock, flags);
  8869. @@ -378,9 +379,48 @@
  8870. schedule_delayed_work_on(cpu,
  8871. &per_cpu(pkg_temp_thermal_threshold_work, cpu),
  8872. msecs_to_jiffies(notify_delay_ms));
  8873. +}
  8874. +
  8875. +#ifdef CONFIG_PREEMPT_RT_FULL
  8876. +static struct swork_event notify_work;
  8877. +
  8878. +static int thermal_notify_work_init(void)
  8879. +{
  8880. + int err;
  8881. +
  8882. + err = swork_get();
  8883. + if (err)
  8884. + return err;
  8885. +
  8886. + INIT_SWORK(&notify_work, platform_thermal_notify_work);
  8887. return 0;
  8888. }
  8889. +static void thermal_notify_work_cleanup(void)
  8890. +{
  8891. + swork_put();
  8892. +}
  8893. +
  8894. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  8895. +{
  8896. + swork_queue(&notify_work);
  8897. + return 0;
  8898. +}
  8899. +
  8900. +#else /* !CONFIG_PREEMPT_RT_FULL */
  8901. +
  8902. +static int thermal_notify_work_init(void) { return 0; }
  8903. +
  8904. +static void thermal_notify_work_cleanup(void) { }
  8905. +
  8906. +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val)
  8907. +{
  8908. + platform_thermal_notify_work(NULL);
  8909. +
  8910. + return 0;
  8911. +}
  8912. +#endif /* CONFIG_PREEMPT_RT_FULL */
  8913. +
  8914. static int find_siblings_cpu(int cpu)
  8915. {
  8916. int i;
  8917. @@ -584,6 +624,9 @@
  8918. if (!x86_match_cpu(pkg_temp_thermal_ids))
  8919. return -ENODEV;
  8920. + if (!thermal_notify_work_init())
  8921. + return -ENODEV;
  8922. +
  8923. spin_lock_init(&pkg_work_lock);
  8924. platform_thermal_package_notify =
  8925. pkg_temp_thermal_platform_thermal_notify;
  8926. @@ -608,7 +651,7 @@
  8927. kfree(pkg_work_scheduled);
  8928. platform_thermal_package_notify = NULL;
  8929. platform_thermal_package_rate_control = NULL;
  8930. -
  8931. + thermal_notify_work_cleanup();
  8932. return -ENODEV;
  8933. }
  8934. @@ -633,6 +676,7 @@
  8935. mutex_unlock(&phy_dev_list_mutex);
  8936. platform_thermal_package_notify = NULL;
  8937. platform_thermal_package_rate_control = NULL;
  8938. + thermal_notify_work_cleanup();
  8939. for_each_online_cpu(i)
  8940. cancel_delayed_work_sync(
  8941. &per_cpu(pkg_temp_thermal_threshold_work, i));
  8942. diff -Nur linux-4.1.39.orig/drivers/tty/serial/8250/8250_core.c linux-4.1.39/drivers/tty/serial/8250/8250_core.c
  8943. --- linux-4.1.39.orig/drivers/tty/serial/8250/8250_core.c 2017-03-13 21:04:36.000000000 +0100
  8944. +++ linux-4.1.39/drivers/tty/serial/8250/8250_core.c 2017-04-18 17:56:30.593396355 +0200
  8945. @@ -36,6 +36,7 @@
  8946. #include <linux/nmi.h>
  8947. #include <linux/mutex.h>
  8948. #include <linux/slab.h>
  8949. +#include <linux/kdb.h>
  8950. #include <linux/uaccess.h>
  8951. #include <linux/pm_runtime.h>
  8952. #ifdef CONFIG_SPARC
  8953. @@ -80,7 +81,16 @@
  8954. #define DEBUG_INTR(fmt...) do { } while (0)
  8955. #endif
  8956. -#define PASS_LIMIT 512
  8957. +/*
  8958. + * On -rt we can have a more delays, and legitimately
  8959. + * so - so don't drop work spuriously and spam the
  8960. + * syslog:
  8961. + */
  8962. +#ifdef CONFIG_PREEMPT_RT_FULL
  8963. +# define PASS_LIMIT 1000000
  8964. +#else
  8965. +# define PASS_LIMIT 512
  8966. +#endif
  8967. #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
  8968. @@ -3366,7 +3376,7 @@
  8969. if (port->sysrq)
  8970. locked = 0;
  8971. - else if (oops_in_progress)
  8972. + else if (oops_in_progress || in_kdb_printk())
  8973. locked = spin_trylock_irqsave(&port->lock, flags);
  8974. else
  8975. spin_lock_irqsave(&port->lock, flags);
  8976. diff -Nur linux-4.1.39.orig/drivers/tty/serial/amba-pl011.c linux-4.1.39/drivers/tty/serial/amba-pl011.c
  8977. --- linux-4.1.39.orig/drivers/tty/serial/amba-pl011.c 2017-03-13 21:04:36.000000000 +0100
  8978. +++ linux-4.1.39/drivers/tty/serial/amba-pl011.c 2017-04-18 17:56:30.597396510 +0200
  8979. @@ -2000,13 +2000,19 @@
  8980. clk_enable(uap->clk);
  8981. - local_irq_save(flags);
  8982. + /*
  8983. + * local_irq_save(flags);
  8984. + *
  8985. + * This local_irq_save() is nonsense. If we come in via sysrq
  8986. + * handling then interrupts are already disabled. Aside of
  8987. + * that the port.sysrq check is racy on SMP regardless.
  8988. + */
  8989. if (uap->port.sysrq)
  8990. locked = 0;
  8991. else if (oops_in_progress)
  8992. - locked = spin_trylock(&uap->port.lock);
  8993. + locked = spin_trylock_irqsave(&uap->port.lock, flags);
  8994. else
  8995. - spin_lock(&uap->port.lock);
  8996. + spin_lock_irqsave(&uap->port.lock, flags);
  8997. /*
  8998. * First save the CR then disable the interrupts
  8999. @@ -2028,8 +2034,7 @@
  9000. writew(old_cr, uap->port.membase + UART011_CR);
  9001. if (locked)
  9002. - spin_unlock(&uap->port.lock);
  9003. - local_irq_restore(flags);
  9004. + spin_unlock_irqrestore(&uap->port.lock, flags);
  9005. clk_disable(uap->clk);
  9006. }
  9007. diff -Nur linux-4.1.39.orig/drivers/tty/serial/omap-serial.c linux-4.1.39/drivers/tty/serial/omap-serial.c
  9008. --- linux-4.1.39.orig/drivers/tty/serial/omap-serial.c 2017-03-13 21:04:36.000000000 +0100
  9009. +++ linux-4.1.39/drivers/tty/serial/omap-serial.c 2017-04-18 17:56:30.597396510 +0200
  9010. @@ -1282,13 +1282,10 @@
  9011. pm_runtime_get_sync(up->dev);
  9012. - local_irq_save(flags);
  9013. - if (up->port.sysrq)
  9014. - locked = 0;
  9015. - else if (oops_in_progress)
  9016. - locked = spin_trylock(&up->port.lock);
  9017. + if (up->port.sysrq || oops_in_progress)
  9018. + locked = spin_trylock_irqsave(&up->port.lock, flags);
  9019. else
  9020. - spin_lock(&up->port.lock);
  9021. + spin_lock_irqsave(&up->port.lock, flags);
  9022. /*
  9023. * First save the IER then disable the interrupts
  9024. @@ -1317,8 +1314,7 @@
  9025. pm_runtime_mark_last_busy(up->dev);
  9026. pm_runtime_put_autosuspend(up->dev);
  9027. if (locked)
  9028. - spin_unlock(&up->port.lock);
  9029. - local_irq_restore(flags);
  9030. + spin_unlock_irqrestore(&up->port.lock, flags);
  9031. }
  9032. static int __init
  9033. diff -Nur linux-4.1.39.orig/drivers/usb/core/hcd.c linux-4.1.39/drivers/usb/core/hcd.c
  9034. --- linux-4.1.39.orig/drivers/usb/core/hcd.c 2017-03-13 21:04:36.000000000 +0100
  9035. +++ linux-4.1.39/drivers/usb/core/hcd.c 2017-04-18 17:56:30.597396510 +0200
  9036. @@ -1684,9 +1684,9 @@
  9037. * and no one may trigger the above deadlock situation when
  9038. * running complete() in tasklet.
  9039. */
  9040. - local_irq_save(flags);
  9041. + local_irq_save_nort(flags);
  9042. urb->complete(urb);
  9043. - local_irq_restore(flags);
  9044. + local_irq_restore_nort(flags);
  9045. usb_anchor_resume_wakeups(anchor);
  9046. atomic_dec(&urb->use_count);
  9047. diff -Nur linux-4.1.39.orig/drivers/usb/gadget/function/f_fs.c linux-4.1.39/drivers/usb/gadget/function/f_fs.c
  9048. --- linux-4.1.39.orig/drivers/usb/gadget/function/f_fs.c 2017-03-13 21:04:36.000000000 +0100
  9049. +++ linux-4.1.39/drivers/usb/gadget/function/f_fs.c 2017-04-18 17:56:30.597396510 +0200
  9050. @@ -1404,7 +1404,7 @@
  9051. pr_info("%s(): freeing\n", __func__);
  9052. ffs_data_clear(ffs);
  9053. BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
  9054. - waitqueue_active(&ffs->ep0req_completion.wait));
  9055. + swaitqueue_active(&ffs->ep0req_completion.wait));
  9056. kfree(ffs->dev_name);
  9057. kfree(ffs);
  9058. }
  9059. diff -Nur linux-4.1.39.orig/drivers/usb/gadget/legacy/inode.c linux-4.1.39/drivers/usb/gadget/legacy/inode.c
  9060. --- linux-4.1.39.orig/drivers/usb/gadget/legacy/inode.c 2017-03-13 21:04:36.000000000 +0100
  9061. +++ linux-4.1.39/drivers/usb/gadget/legacy/inode.c 2017-04-18 17:56:30.597396510 +0200
  9062. @@ -345,7 +345,7 @@
  9063. spin_unlock_irq (&epdata->dev->lock);
  9064. if (likely (value == 0)) {
  9065. - value = wait_event_interruptible (done.wait, done.done);
  9066. + value = swait_event_interruptible (done.wait, done.done);
  9067. if (value != 0) {
  9068. spin_lock_irq (&epdata->dev->lock);
  9069. if (likely (epdata->ep != NULL)) {
  9070. @@ -354,7 +354,7 @@
  9071. usb_ep_dequeue (epdata->ep, epdata->req);
  9072. spin_unlock_irq (&epdata->dev->lock);
  9073. - wait_event (done.wait, done.done);
  9074. + swait_event (done.wait, done.done);
  9075. if (epdata->status == -ECONNRESET)
  9076. epdata->status = -EINTR;
  9077. } else {
  9078. diff -Nur linux-4.1.39.orig/drivers/usb/gadget/udc/atmel_usba_udc.c linux-4.1.39/drivers/usb/gadget/udc/atmel_usba_udc.c
  9079. --- linux-4.1.39.orig/drivers/usb/gadget/udc/atmel_usba_udc.c 2017-03-13 21:04:36.000000000 +0100
  9080. +++ linux-4.1.39/drivers/usb/gadget/udc/atmel_usba_udc.c 2017-04-18 17:56:30.597396510 +0200
  9081. @@ -17,7 +17,9 @@
  9082. #include <linux/device.h>
  9083. #include <linux/dma-mapping.h>
  9084. #include <linux/list.h>
  9085. +#include <linux/mfd/syscon.h>
  9086. #include <linux/platform_device.h>
  9087. +#include <linux/regmap.h>
  9088. #include <linux/usb/ch9.h>
  9089. #include <linux/usb/gadget.h>
  9090. #include <linux/usb/atmel_usba_udc.h>
  9091. @@ -1889,20 +1891,15 @@
  9092. #ifdef CONFIG_OF
  9093. static void at91sam9rl_toggle_bias(struct usba_udc *udc, int is_on)
  9094. {
  9095. - unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR);
  9096. -
  9097. - if (is_on)
  9098. - at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN);
  9099. - else
  9100. - at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN));
  9101. + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN,
  9102. + is_on ? AT91_PMC_BIASEN : 0);
  9103. }
  9104. static void at91sam9g45_pulse_bias(struct usba_udc *udc)
  9105. {
  9106. - unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR);
  9107. -
  9108. - at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN));
  9109. - at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN);
  9110. + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN, 0);
  9111. + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN,
  9112. + AT91_PMC_BIASEN);
  9113. }
  9114. static const struct usba_udc_errata at91sam9rl_errata = {
  9115. @@ -1939,6 +1936,9 @@
  9116. return ERR_PTR(-EINVAL);
  9117. udc->errata = match->data;
  9118. + udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9g45-pmc");
  9119. + if (udc->errata && IS_ERR(udc->pmc))
  9120. + return ERR_CAST(udc->pmc);
  9121. udc->num_ep = 0;
  9122. diff -Nur linux-4.1.39.orig/drivers/usb/gadget/udc/atmel_usba_udc.h linux-4.1.39/drivers/usb/gadget/udc/atmel_usba_udc.h
  9123. --- linux-4.1.39.orig/drivers/usb/gadget/udc/atmel_usba_udc.h 2017-03-13 21:04:36.000000000 +0100
  9124. +++ linux-4.1.39/drivers/usb/gadget/udc/atmel_usba_udc.h 2017-04-18 17:56:30.597396510 +0200
  9125. @@ -354,6 +354,8 @@
  9126. struct dentry *debugfs_root;
  9127. struct dentry *debugfs_regs;
  9128. #endif
  9129. +
  9130. + struct regmap *pmc;
  9131. };
  9132. static inline struct usba_ep *to_usba_ep(struct usb_ep *ep)
  9133. diff -Nur linux-4.1.39.orig/fs/aio.c linux-4.1.39/fs/aio.c
  9134. --- linux-4.1.39.orig/fs/aio.c 2017-03-13 21:04:36.000000000 +0100
  9135. +++ linux-4.1.39/fs/aio.c 2017-04-18 17:56:30.597396510 +0200
  9136. @@ -40,6 +40,7 @@
  9137. #include <linux/ramfs.h>
  9138. #include <linux/percpu-refcount.h>
  9139. #include <linux/mount.h>
  9140. +#include <linux/work-simple.h>
  9141. #include <asm/kmap_types.h>
  9142. #include <asm/uaccess.h>
  9143. @@ -115,7 +116,7 @@
  9144. struct page **ring_pages;
  9145. long nr_pages;
  9146. - struct work_struct free_work;
  9147. + struct swork_event free_work;
  9148. /*
  9149. * signals when all in-flight requests are done
  9150. @@ -253,6 +254,7 @@
  9151. .mount = aio_mount,
  9152. .kill_sb = kill_anon_super,
  9153. };
  9154. + BUG_ON(swork_get());
  9155. aio_mnt = kern_mount(&aio_fs);
  9156. if (IS_ERR(aio_mnt))
  9157. panic("Failed to create aio fs mount.");
  9158. @@ -559,9 +561,9 @@
  9159. return cancel(&kiocb->common);
  9160. }
  9161. -static void free_ioctx(struct work_struct *work)
  9162. +static void free_ioctx(struct swork_event *sev)
  9163. {
  9164. - struct kioctx *ctx = container_of(work, struct kioctx, free_work);
  9165. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  9166. pr_debug("freeing %p\n", ctx);
  9167. @@ -580,8 +582,8 @@
  9168. if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
  9169. complete(&ctx->rq_wait->comp);
  9170. - INIT_WORK(&ctx->free_work, free_ioctx);
  9171. - schedule_work(&ctx->free_work);
  9172. + INIT_SWORK(&ctx->free_work, free_ioctx);
  9173. + swork_queue(&ctx->free_work);
  9174. }
  9175. /*
  9176. @@ -589,9 +591,9 @@
  9177. * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  9178. * now it's safe to cancel any that need to be.
  9179. */
  9180. -static void free_ioctx_users(struct percpu_ref *ref)
  9181. +static void free_ioctx_users_work(struct swork_event *sev)
  9182. {
  9183. - struct kioctx *ctx = container_of(ref, struct kioctx, users);
  9184. + struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
  9185. struct aio_kiocb *req;
  9186. spin_lock_irq(&ctx->ctx_lock);
  9187. @@ -610,6 +612,14 @@
  9188. percpu_ref_put(&ctx->reqs);
  9189. }
  9190. +static void free_ioctx_users(struct percpu_ref *ref)
  9191. +{
  9192. + struct kioctx *ctx = container_of(ref, struct kioctx, users);
  9193. +
  9194. + INIT_SWORK(&ctx->free_work, free_ioctx_users_work);
  9195. + swork_queue(&ctx->free_work);
  9196. +}
  9197. +
  9198. static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  9199. {
  9200. unsigned i, new_nr;
  9201. diff -Nur linux-4.1.39.orig/fs/autofs4/autofs_i.h linux-4.1.39/fs/autofs4/autofs_i.h
  9202. --- linux-4.1.39.orig/fs/autofs4/autofs_i.h 2017-03-13 21:04:36.000000000 +0100
  9203. +++ linux-4.1.39/fs/autofs4/autofs_i.h 2017-04-18 17:56:30.597396510 +0200
  9204. @@ -34,6 +34,7 @@
  9205. #include <linux/sched.h>
  9206. #include <linux/mount.h>
  9207. #include <linux/namei.h>
  9208. +#include <linux/delay.h>
  9209. #include <asm/current.h>
  9210. #include <asm/uaccess.h>
  9211. diff -Nur linux-4.1.39.orig/fs/autofs4/expire.c linux-4.1.39/fs/autofs4/expire.c
  9212. --- linux-4.1.39.orig/fs/autofs4/expire.c 2017-03-13 21:04:36.000000000 +0100
  9213. +++ linux-4.1.39/fs/autofs4/expire.c 2017-04-18 17:56:30.597396510 +0200
  9214. @@ -150,7 +150,7 @@
  9215. parent = p->d_parent;
  9216. if (!spin_trylock(&parent->d_lock)) {
  9217. spin_unlock(&p->d_lock);
  9218. - cpu_relax();
  9219. + cpu_chill();
  9220. goto relock;
  9221. }
  9222. spin_unlock(&p->d_lock);
  9223. diff -Nur linux-4.1.39.orig/fs/buffer.c linux-4.1.39/fs/buffer.c
  9224. --- linux-4.1.39.orig/fs/buffer.c 2017-03-13 21:04:36.000000000 +0100
  9225. +++ linux-4.1.39/fs/buffer.c 2017-04-18 17:56:30.597396510 +0200
  9226. @@ -301,8 +301,7 @@
  9227. * decide that the page is now completely done.
  9228. */
  9229. first = page_buffers(page);
  9230. - local_irq_save(flags);
  9231. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  9232. + flags = bh_uptodate_lock_irqsave(first);
  9233. clear_buffer_async_read(bh);
  9234. unlock_buffer(bh);
  9235. tmp = bh;
  9236. @@ -315,8 +314,7 @@
  9237. }
  9238. tmp = tmp->b_this_page;
  9239. } while (tmp != bh);
  9240. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  9241. - local_irq_restore(flags);
  9242. + bh_uptodate_unlock_irqrestore(first, flags);
  9243. /*
  9244. * If none of the buffers had errors and they are all
  9245. @@ -328,9 +326,7 @@
  9246. return;
  9247. still_busy:
  9248. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  9249. - local_irq_restore(flags);
  9250. - return;
  9251. + bh_uptodate_unlock_irqrestore(first, flags);
  9252. }
  9253. /*
  9254. @@ -358,8 +354,7 @@
  9255. }
  9256. first = page_buffers(page);
  9257. - local_irq_save(flags);
  9258. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  9259. + flags = bh_uptodate_lock_irqsave(first);
  9260. clear_buffer_async_write(bh);
  9261. unlock_buffer(bh);
  9262. @@ -371,15 +366,12 @@
  9263. }
  9264. tmp = tmp->b_this_page;
  9265. }
  9266. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  9267. - local_irq_restore(flags);
  9268. + bh_uptodate_unlock_irqrestore(first, flags);
  9269. end_page_writeback(page);
  9270. return;
  9271. still_busy:
  9272. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  9273. - local_irq_restore(flags);
  9274. - return;
  9275. + bh_uptodate_unlock_irqrestore(first, flags);
  9276. }
  9277. EXPORT_SYMBOL(end_buffer_async_write);
  9278. @@ -3325,6 +3317,7 @@
  9279. struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
  9280. if (ret) {
  9281. INIT_LIST_HEAD(&ret->b_assoc_buffers);
  9282. + buffer_head_init_locks(ret);
  9283. preempt_disable();
  9284. __this_cpu_inc(bh_accounting.nr);
  9285. recalc_bh_state();
  9286. diff -Nur linux-4.1.39.orig/fs/dcache.c linux-4.1.39/fs/dcache.c
  9287. --- linux-4.1.39.orig/fs/dcache.c 2017-03-13 21:04:36.000000000 +0100
  9288. +++ linux-4.1.39/fs/dcache.c 2017-04-18 17:56:30.597396510 +0200
  9289. @@ -19,6 +19,7 @@
  9290. #include <linux/mm.h>
  9291. #include <linux/fs.h>
  9292. #include <linux/fsnotify.h>
  9293. +#include <linux/delay.h>
  9294. #include <linux/slab.h>
  9295. #include <linux/init.h>
  9296. #include <linux/hash.h>
  9297. @@ -747,6 +748,8 @@
  9298. */
  9299. void dput(struct dentry *dentry)
  9300. {
  9301. + struct dentry *parent;
  9302. +
  9303. if (unlikely(!dentry))
  9304. return;
  9305. @@ -783,9 +786,18 @@
  9306. return;
  9307. kill_it:
  9308. - dentry = dentry_kill(dentry);
  9309. - if (dentry) {
  9310. - cond_resched();
  9311. + parent = dentry_kill(dentry);
  9312. + if (parent) {
  9313. + int r;
  9314. +
  9315. + if (parent == dentry) {
  9316. + /* the task with the highest priority won't schedule */
  9317. + r = cond_resched();
  9318. + if (!r)
  9319. + cpu_chill();
  9320. + } else {
  9321. + dentry = parent;
  9322. + }
  9323. goto repeat;
  9324. }
  9325. }
  9326. @@ -2394,7 +2406,7 @@
  9327. if (dentry->d_lockref.count == 1) {
  9328. if (!spin_trylock(&inode->i_lock)) {
  9329. spin_unlock(&dentry->d_lock);
  9330. - cpu_relax();
  9331. + cpu_chill();
  9332. goto again;
  9333. }
  9334. dentry->d_flags &= ~DCACHE_CANT_MOUNT;
  9335. diff -Nur linux-4.1.39.orig/fs/eventpoll.c linux-4.1.39/fs/eventpoll.c
  9336. --- linux-4.1.39.orig/fs/eventpoll.c 2017-03-13 21:04:36.000000000 +0100
  9337. +++ linux-4.1.39/fs/eventpoll.c 2017-04-18 17:56:30.601396665 +0200
  9338. @@ -505,12 +505,12 @@
  9339. */
  9340. static void ep_poll_safewake(wait_queue_head_t *wq)
  9341. {
  9342. - int this_cpu = get_cpu();
  9343. + int this_cpu = get_cpu_light();
  9344. ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
  9345. ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
  9346. - put_cpu();
  9347. + put_cpu_light();
  9348. }
  9349. static void ep_remove_wait_queue(struct eppoll_entry *pwq)
  9350. diff -Nur linux-4.1.39.orig/fs/exec.c linux-4.1.39/fs/exec.c
  9351. --- linux-4.1.39.orig/fs/exec.c 2017-03-13 21:04:36.000000000 +0100
  9352. +++ linux-4.1.39/fs/exec.c 2017-04-18 17:56:30.601396665 +0200
  9353. @@ -859,12 +859,14 @@
  9354. }
  9355. }
  9356. task_lock(tsk);
  9357. + preempt_disable_rt();
  9358. active_mm = tsk->active_mm;
  9359. tsk->mm = mm;
  9360. tsk->active_mm = mm;
  9361. activate_mm(active_mm, mm);
  9362. tsk->mm->vmacache_seqnum = 0;
  9363. vmacache_flush(tsk);
  9364. + preempt_enable_rt();
  9365. task_unlock(tsk);
  9366. if (old_mm) {
  9367. up_read(&old_mm->mmap_sem);
  9368. diff -Nur linux-4.1.39.orig/fs/f2fs/f2fs.h linux-4.1.39/fs/f2fs/f2fs.h
  9369. --- linux-4.1.39.orig/fs/f2fs/f2fs.h 2017-03-13 21:04:36.000000000 +0100
  9370. +++ linux-4.1.39/fs/f2fs/f2fs.h 2017-04-18 17:56:30.601396665 +0200
  9371. @@ -22,7 +22,6 @@
  9372. #ifdef CONFIG_F2FS_CHECK_FS
  9373. #define f2fs_bug_on(sbi, condition) BUG_ON(condition)
  9374. -#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
  9375. #else
  9376. #define f2fs_bug_on(sbi, condition) \
  9377. do { \
  9378. @@ -31,7 +30,6 @@
  9379. set_sbi_flag(sbi, SBI_NEED_FSCK); \
  9380. } \
  9381. } while (0)
  9382. -#define f2fs_down_write(x, y) down_write(x)
  9383. #endif
  9384. /*
  9385. @@ -838,7 +836,7 @@
  9386. static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
  9387. {
  9388. - f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex);
  9389. + down_write(&sbi->cp_rwsem);
  9390. }
  9391. static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
  9392. diff -Nur linux-4.1.39.orig/fs/jbd/checkpoint.c linux-4.1.39/fs/jbd/checkpoint.c
  9393. --- linux-4.1.39.orig/fs/jbd/checkpoint.c 2017-03-13 21:04:36.000000000 +0100
  9394. +++ linux-4.1.39/fs/jbd/checkpoint.c 2017-04-18 17:56:30.601396665 +0200
  9395. @@ -129,6 +129,8 @@
  9396. if (journal->j_flags & JFS_ABORT)
  9397. return;
  9398. spin_unlock(&journal->j_state_lock);
  9399. + if (current->plug)
  9400. + io_schedule();
  9401. mutex_lock(&journal->j_checkpoint_mutex);
  9402. /*
  9403. diff -Nur linux-4.1.39.orig/fs/jbd2/checkpoint.c linux-4.1.39/fs/jbd2/checkpoint.c
  9404. --- linux-4.1.39.orig/fs/jbd2/checkpoint.c 2017-03-13 21:04:36.000000000 +0100
  9405. +++ linux-4.1.39/fs/jbd2/checkpoint.c 2017-04-18 17:56:30.601396665 +0200
  9406. @@ -116,6 +116,8 @@
  9407. nblocks = jbd2_space_needed(journal);
  9408. while (jbd2_log_space_left(journal) < nblocks) {
  9409. write_unlock(&journal->j_state_lock);
  9410. + if (current->plug)
  9411. + io_schedule();
  9412. mutex_lock(&journal->j_checkpoint_mutex);
  9413. /*
  9414. diff -Nur linux-4.1.39.orig/fs/namespace.c linux-4.1.39/fs/namespace.c
  9415. --- linux-4.1.39.orig/fs/namespace.c 2017-03-13 21:04:36.000000000 +0100
  9416. +++ linux-4.1.39/fs/namespace.c 2017-04-18 17:56:30.601396665 +0200
  9417. @@ -14,6 +14,7 @@
  9418. #include <linux/mnt_namespace.h>
  9419. #include <linux/user_namespace.h>
  9420. #include <linux/namei.h>
  9421. +#include <linux/delay.h>
  9422. #include <linux/security.h>
  9423. #include <linux/idr.h>
  9424. #include <linux/init.h> /* init_rootfs */
  9425. @@ -356,8 +357,11 @@
  9426. * incremented count after it has set MNT_WRITE_HOLD.
  9427. */
  9428. smp_mb();
  9429. - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
  9430. - cpu_relax();
  9431. + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
  9432. + preempt_enable();
  9433. + cpu_chill();
  9434. + preempt_disable();
  9435. + }
  9436. /*
  9437. * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
  9438. * be set to match its requirements. So we must not load that until
  9439. diff -Nur linux-4.1.39.orig/fs/ntfs/aops.c linux-4.1.39/fs/ntfs/aops.c
  9440. --- linux-4.1.39.orig/fs/ntfs/aops.c 2017-03-13 21:04:36.000000000 +0100
  9441. +++ linux-4.1.39/fs/ntfs/aops.c 2017-04-18 17:56:30.601396665 +0200
  9442. @@ -107,8 +107,7 @@
  9443. "0x%llx.", (unsigned long long)bh->b_blocknr);
  9444. }
  9445. first = page_buffers(page);
  9446. - local_irq_save(flags);
  9447. - bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
  9448. + flags = bh_uptodate_lock_irqsave(first);
  9449. clear_buffer_async_read(bh);
  9450. unlock_buffer(bh);
  9451. tmp = bh;
  9452. @@ -123,8 +122,7 @@
  9453. }
  9454. tmp = tmp->b_this_page;
  9455. } while (tmp != bh);
  9456. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  9457. - local_irq_restore(flags);
  9458. + bh_uptodate_unlock_irqrestore(first, flags);
  9459. /*
  9460. * If none of the buffers had errors then we can set the page uptodate,
  9461. * but we first have to perform the post read mst fixups, if the
  9462. @@ -145,13 +143,13 @@
  9463. recs = PAGE_CACHE_SIZE / rec_size;
  9464. /* Should have been verified before we got here... */
  9465. BUG_ON(!recs);
  9466. - local_irq_save(flags);
  9467. + local_irq_save_nort(flags);
  9468. kaddr = kmap_atomic(page);
  9469. for (i = 0; i < recs; i++)
  9470. post_read_mst_fixup((NTFS_RECORD*)(kaddr +
  9471. i * rec_size), rec_size);
  9472. kunmap_atomic(kaddr);
  9473. - local_irq_restore(flags);
  9474. + local_irq_restore_nort(flags);
  9475. flush_dcache_page(page);
  9476. if (likely(page_uptodate && !PageError(page)))
  9477. SetPageUptodate(page);
  9478. @@ -159,9 +157,7 @@
  9479. unlock_page(page);
  9480. return;
  9481. still_busy:
  9482. - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
  9483. - local_irq_restore(flags);
  9484. - return;
  9485. + bh_uptodate_unlock_irqrestore(first, flags);
  9486. }
  9487. /**
  9488. diff -Nur linux-4.1.39.orig/fs/timerfd.c linux-4.1.39/fs/timerfd.c
  9489. --- linux-4.1.39.orig/fs/timerfd.c 2017-03-13 21:04:36.000000000 +0100
  9490. +++ linux-4.1.39/fs/timerfd.c 2017-04-18 17:56:30.601396665 +0200
  9491. @@ -450,7 +450,10 @@
  9492. break;
  9493. }
  9494. spin_unlock_irq(&ctx->wqh.lock);
  9495. - cpu_relax();
  9496. + if (isalarm(ctx))
  9497. + hrtimer_wait_for_timer(&ctx->t.alarm.timer);
  9498. + else
  9499. + hrtimer_wait_for_timer(&ctx->t.tmr);
  9500. }
  9501. /*
  9502. diff -Nur linux-4.1.39.orig/fs/xfs/xfs_inode.c linux-4.1.39/fs/xfs/xfs_inode.c
  9503. --- linux-4.1.39.orig/fs/xfs/xfs_inode.c 2017-03-13 21:04:36.000000000 +0100
  9504. +++ linux-4.1.39/fs/xfs/xfs_inode.c 2017-04-18 17:56:30.601396665 +0200
  9505. @@ -164,7 +164,7 @@
  9506. (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
  9507. ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
  9508. (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
  9509. - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
  9510. + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
  9511. if (lock_flags & XFS_IOLOCK_EXCL)
  9512. mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
  9513. @@ -212,7 +212,7 @@
  9514. (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
  9515. ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
  9516. (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
  9517. - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
  9518. + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
  9519. if (lock_flags & XFS_IOLOCK_EXCL) {
  9520. if (!mrtryupdate(&ip->i_iolock))
  9521. @@ -281,7 +281,7 @@
  9522. (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
  9523. ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
  9524. (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
  9525. - ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
  9526. + ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
  9527. ASSERT(lock_flags != 0);
  9528. if (lock_flags & XFS_IOLOCK_EXCL)
  9529. @@ -364,30 +364,38 @@
  9530. /*
  9531. * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
  9532. - * value. This shouldn't be called for page fault locking, but we also need to
  9533. - * ensure we don't overrun the number of lockdep subclasses for the iolock or
  9534. - * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
  9535. + * value. This can be called for any type of inode lock combination, including
  9536. + * parent locking. Care must be taken to ensure we don't overrun the subclass
  9537. + * storage fields in the class mask we build.
  9538. */
  9539. static inline int
  9540. xfs_lock_inumorder(int lock_mode, int subclass)
  9541. {
  9542. + int class = 0;
  9543. +
  9544. + ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP |
  9545. + XFS_ILOCK_RTSUM)));
  9546. +
  9547. if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
  9548. - ASSERT(subclass + XFS_LOCK_INUMORDER <
  9549. - (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
  9550. - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
  9551. + ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
  9552. + ASSERT(subclass + XFS_IOLOCK_PARENT_VAL <
  9553. + MAX_LOCKDEP_SUBCLASSES);
  9554. + class += subclass << XFS_IOLOCK_SHIFT;
  9555. + if (lock_mode & XFS_IOLOCK_PARENT)
  9556. + class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
  9557. }
  9558. if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
  9559. - ASSERT(subclass + XFS_LOCK_INUMORDER <
  9560. - (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
  9561. - lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
  9562. - XFS_MMAPLOCK_SHIFT;
  9563. + ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
  9564. + class += subclass << XFS_MMAPLOCK_SHIFT;
  9565. }
  9566. - if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
  9567. - lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
  9568. + if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) {
  9569. + ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
  9570. + class += subclass << XFS_ILOCK_SHIFT;
  9571. + }
  9572. - return lock_mode;
  9573. + return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class;
  9574. }
  9575. /*
  9576. @@ -399,6 +407,11 @@
  9577. * transaction (such as truncate). This can result in deadlock since the long
  9578. * running trans might need to wait for the inode we just locked in order to
  9579. * push the tail and free space in the log.
  9580. + *
  9581. + * xfs_lock_inodes() can only be used to lock one type of lock at a time -
  9582. + * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
  9583. + * lock more than one at a time, lockdep will report false positives saying we
  9584. + * have violated locking orders.
  9585. */
  9586. void
  9587. xfs_lock_inodes(
  9588. @@ -409,8 +422,29 @@
  9589. int attempts = 0, i, j, try_lock;
  9590. xfs_log_item_t *lp;
  9591. - /* currently supports between 2 and 5 inodes */
  9592. + /*
  9593. + * Currently supports between 2 and 5 inodes with exclusive locking. We
  9594. + * support an arbitrary depth of locking here, but absolute limits on
  9595. + * inodes depend on the the type of locking and the limits placed by
  9596. + * lockdep annotations in xfs_lock_inumorder. These are all checked by
  9597. + * the asserts.
  9598. + */
  9599. ASSERT(ips && inodes >= 2 && inodes <= 5);
  9600. + ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL |
  9601. + XFS_ILOCK_EXCL));
  9602. + ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
  9603. + XFS_ILOCK_SHARED)));
  9604. + ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
  9605. + inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
  9606. + ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
  9607. + inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
  9608. + ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
  9609. + inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
  9610. +
  9611. + if (lock_mode & XFS_IOLOCK_EXCL) {
  9612. + ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
  9613. + } else if (lock_mode & XFS_MMAPLOCK_EXCL)
  9614. + ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
  9615. try_lock = 0;
  9616. i = 0;
  9617. diff -Nur linux-4.1.39.orig/fs/xfs/xfs_inode.h linux-4.1.39/fs/xfs/xfs_inode.h
  9618. --- linux-4.1.39.orig/fs/xfs/xfs_inode.h 2017-03-13 21:04:36.000000000 +0100
  9619. +++ linux-4.1.39/fs/xfs/xfs_inode.h 2017-04-18 17:56:30.601396665 +0200
  9620. @@ -284,9 +284,9 @@
  9621. * Flags for lockdep annotations.
  9622. *
  9623. * XFS_LOCK_PARENT - for directory operations that require locking a
  9624. - * parent directory inode and a child entry inode. The parent gets locked
  9625. - * with this flag so it gets a lockdep subclass of 1 and the child entry
  9626. - * lock will have a lockdep subclass of 0.
  9627. + * parent directory inode and a child entry inode. IOLOCK requires nesting,
  9628. + * MMAPLOCK does not support this class, ILOCK requires a single subclass
  9629. + * to differentiate parent from child.
  9630. *
  9631. * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
  9632. * inodes do not participate in the normal lock order, and thus have their
  9633. @@ -295,30 +295,63 @@
  9634. * XFS_LOCK_INUMORDER - for locking several inodes at the some time
  9635. * with xfs_lock_inodes(). This flag is used as the starting subclass
  9636. * and each subsequent lock acquired will increment the subclass by one.
  9637. - * So the first lock acquired will have a lockdep subclass of 4, the
  9638. - * second lock will have a lockdep subclass of 5, and so on. It is
  9639. - * the responsibility of the class builder to shift this to the correct
  9640. - * portion of the lock_mode lockdep mask.
  9641. + * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly
  9642. + * limited to the subclasses we can represent via nesting. We need at least
  9643. + * 5 inodes nest depth for the ILOCK through rename, and we also have to support
  9644. + * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP
  9645. + * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all
  9646. + * 8 subclasses supported by lockdep.
  9647. + *
  9648. + * This also means we have to number the sub-classes in the lowest bits of
  9649. + * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep
  9650. + * mask and we can't use bit-masking to build the subclasses. What a mess.
  9651. + *
  9652. + * Bit layout:
  9653. + *
  9654. + * Bit Lock Region
  9655. + * 16-19 XFS_IOLOCK_SHIFT dependencies
  9656. + * 20-23 XFS_MMAPLOCK_SHIFT dependencies
  9657. + * 24-31 XFS_ILOCK_SHIFT dependencies
  9658. + *
  9659. + * IOLOCK values
  9660. + *
  9661. + * 0-3 subclass value
  9662. + * 4-7 PARENT subclass values
  9663. + *
  9664. + * MMAPLOCK values
  9665. + *
  9666. + * 0-3 subclass value
  9667. + * 4-7 unused
  9668. + *
  9669. + * ILOCK values
  9670. + * 0-4 subclass values
  9671. + * 5 PARENT subclass (not nestable)
  9672. + * 6 RTBITMAP subclass (not nestable)
  9673. + * 7 RTSUM subclass (not nestable)
  9674. + *
  9675. */
  9676. -#define XFS_LOCK_PARENT 1
  9677. -#define XFS_LOCK_RTBITMAP 2
  9678. -#define XFS_LOCK_RTSUM 3
  9679. -#define XFS_LOCK_INUMORDER 4
  9680. -
  9681. -#define XFS_IOLOCK_SHIFT 16
  9682. -#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
  9683. +#define XFS_IOLOCK_SHIFT 16
  9684. +#define XFS_IOLOCK_PARENT_VAL 4
  9685. +#define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1)
  9686. +#define XFS_IOLOCK_DEP_MASK 0x000f0000
  9687. +#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT)
  9688. -#define XFS_MMAPLOCK_SHIFT 20
  9689. +#define XFS_MMAPLOCK_SHIFT 20
  9690. +#define XFS_MMAPLOCK_NUMORDER 0
  9691. +#define XFS_MMAPLOCK_MAX_SUBCLASS 3
  9692. +#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
  9693. -#define XFS_ILOCK_SHIFT 24
  9694. -#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
  9695. -#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
  9696. -#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
  9697. +#define XFS_ILOCK_SHIFT 24
  9698. +#define XFS_ILOCK_PARENT_VAL 5
  9699. +#define XFS_ILOCK_MAX_SUBCLASS (XFS_ILOCK_PARENT_VAL - 1)
  9700. +#define XFS_ILOCK_RTBITMAP_VAL 6
  9701. +#define XFS_ILOCK_RTSUM_VAL 7
  9702. +#define XFS_ILOCK_DEP_MASK 0xff000000
  9703. +#define XFS_ILOCK_PARENT (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT)
  9704. +#define XFS_ILOCK_RTBITMAP (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT)
  9705. +#define XFS_ILOCK_RTSUM (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT)
  9706. -#define XFS_IOLOCK_DEP_MASK 0x000f0000
  9707. -#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
  9708. -#define XFS_ILOCK_DEP_MASK 0xff000000
  9709. -#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
  9710. +#define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \
  9711. XFS_MMAPLOCK_DEP_MASK | \
  9712. XFS_ILOCK_DEP_MASK)
  9713. diff -Nur linux-4.1.39.orig/include/acpi/platform/aclinux.h linux-4.1.39/include/acpi/platform/aclinux.h
  9714. --- linux-4.1.39.orig/include/acpi/platform/aclinux.h 2017-03-13 21:04:36.000000000 +0100
  9715. +++ linux-4.1.39/include/acpi/platform/aclinux.h 2017-04-18 17:56:30.601396665 +0200
  9716. @@ -123,6 +123,7 @@
  9717. #define acpi_cache_t struct kmem_cache
  9718. #define acpi_spinlock spinlock_t *
  9719. +#define acpi_raw_spinlock raw_spinlock_t *
  9720. #define acpi_cpu_flags unsigned long
  9721. /* Use native linux version of acpi_os_allocate_zeroed */
  9722. @@ -141,6 +142,20 @@
  9723. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
  9724. #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
  9725. +#define acpi_os_create_raw_lock(__handle) \
  9726. +({ \
  9727. + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \
  9728. + \
  9729. + if (lock) { \
  9730. + *(__handle) = lock; \
  9731. + raw_spin_lock_init(*(__handle)); \
  9732. + } \
  9733. + lock ? AE_OK : AE_NO_MEMORY; \
  9734. + })
  9735. +
  9736. +#define acpi_os_delete_raw_lock(__handle) kfree(__handle)
  9737. +
  9738. +
  9739. /*
  9740. * OSL interfaces used by debugger/disassembler
  9741. */
  9742. diff -Nur linux-4.1.39.orig/include/asm-generic/bug.h linux-4.1.39/include/asm-generic/bug.h
  9743. --- linux-4.1.39.orig/include/asm-generic/bug.h 2017-03-13 21:04:36.000000000 +0100
  9744. +++ linux-4.1.39/include/asm-generic/bug.h 2017-04-18 17:56:30.601396665 +0200
  9745. @@ -206,6 +206,20 @@
  9746. # define WARN_ON_SMP(x) ({0;})
  9747. #endif
  9748. +#ifdef CONFIG_PREEMPT_RT_BASE
  9749. +# define BUG_ON_RT(c) BUG_ON(c)
  9750. +# define BUG_ON_NONRT(c) do { } while (0)
  9751. +# define WARN_ON_RT(condition) WARN_ON(condition)
  9752. +# define WARN_ON_NONRT(condition) do { } while (0)
  9753. +# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
  9754. +#else
  9755. +# define BUG_ON_RT(c) do { } while (0)
  9756. +# define BUG_ON_NONRT(c) BUG_ON(c)
  9757. +# define WARN_ON_RT(condition) do { } while (0)
  9758. +# define WARN_ON_NONRT(condition) WARN_ON(condition)
  9759. +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
  9760. +#endif
  9761. +
  9762. #endif /* __ASSEMBLY__ */
  9763. #endif
  9764. diff -Nur linux-4.1.39.orig/include/asm-generic/futex.h linux-4.1.39/include/asm-generic/futex.h
  9765. --- linux-4.1.39.orig/include/asm-generic/futex.h 2017-03-13 21:04:36.000000000 +0100
  9766. +++ linux-4.1.39/include/asm-generic/futex.h 2017-04-18 17:56:30.601396665 +0200
  9767. @@ -8,8 +8,7 @@
  9768. #ifndef CONFIG_SMP
  9769. /*
  9770. * The following implementation only for uniprocessor machines.
  9771. - * For UP, it's relies on the fact that pagefault_disable() also disables
  9772. - * preemption to ensure mutual exclusion.
  9773. + * It relies on preempt_disable() ensuring mutual exclusion.
  9774. *
  9775. */
  9776. @@ -38,6 +37,7 @@
  9777. if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
  9778. oparg = 1 << oparg;
  9779. + preempt_disable();
  9780. pagefault_disable();
  9781. ret = -EFAULT;
  9782. @@ -72,6 +72,7 @@
  9783. out_pagefault_enable:
  9784. pagefault_enable();
  9785. + preempt_enable();
  9786. if (ret == 0) {
  9787. switch (cmp) {
  9788. @@ -106,6 +107,7 @@
  9789. {
  9790. u32 val;
  9791. + preempt_disable();
  9792. if (unlikely(get_user(val, uaddr) != 0))
  9793. return -EFAULT;
  9794. @@ -113,6 +115,7 @@
  9795. return -EFAULT;
  9796. *uval = val;
  9797. + preempt_enable();
  9798. return 0;
  9799. }
  9800. diff -Nur linux-4.1.39.orig/include/asm-generic/preempt.h linux-4.1.39/include/asm-generic/preempt.h
  9801. --- linux-4.1.39.orig/include/asm-generic/preempt.h 2017-03-13 21:04:36.000000000 +0100
  9802. +++ linux-4.1.39/include/asm-generic/preempt.h 2017-04-18 17:56:30.601396665 +0200
  9803. @@ -7,10 +7,10 @@
  9804. static __always_inline int preempt_count(void)
  9805. {
  9806. - return current_thread_info()->preempt_count;
  9807. + return READ_ONCE(current_thread_info()->preempt_count);
  9808. }
  9809. -static __always_inline int *preempt_count_ptr(void)
  9810. +static __always_inline volatile int *preempt_count_ptr(void)
  9811. {
  9812. return &current_thread_info()->preempt_count;
  9813. }
  9814. diff -Nur linux-4.1.39.orig/include/linux/blkdev.h linux-4.1.39/include/linux/blkdev.h
  9815. --- linux-4.1.39.orig/include/linux/blkdev.h 2017-03-13 21:04:36.000000000 +0100
  9816. +++ linux-4.1.39/include/linux/blkdev.h 2017-04-18 17:56:30.601396665 +0200
  9817. @@ -101,6 +101,7 @@
  9818. struct list_head queuelist;
  9819. union {
  9820. struct call_single_data csd;
  9821. + struct work_struct work;
  9822. unsigned long fifo_time;
  9823. };
  9824. @@ -482,7 +483,7 @@
  9825. struct throtl_data *td;
  9826. #endif
  9827. struct rcu_head rcu_head;
  9828. - wait_queue_head_t mq_freeze_wq;
  9829. + struct swait_head mq_freeze_wq;
  9830. struct percpu_ref mq_usage_counter;
  9831. struct list_head all_q_node;
  9832. diff -Nur linux-4.1.39.orig/include/linux/blk-mq.h linux-4.1.39/include/linux/blk-mq.h
  9833. --- linux-4.1.39.orig/include/linux/blk-mq.h 2017-03-13 21:04:36.000000000 +0100
  9834. +++ linux-4.1.39/include/linux/blk-mq.h 2017-04-18 17:56:30.601396665 +0200
  9835. @@ -202,6 +202,7 @@
  9836. struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
  9837. struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
  9838. +void __blk_mq_complete_request_remote_work(struct work_struct *work);
  9839. int blk_mq_request_started(struct request *rq);
  9840. void blk_mq_start_request(struct request *rq);
  9841. diff -Nur linux-4.1.39.orig/include/linux/bottom_half.h linux-4.1.39/include/linux/bottom_half.h
  9842. --- linux-4.1.39.orig/include/linux/bottom_half.h 2017-03-13 21:04:36.000000000 +0100
  9843. +++ linux-4.1.39/include/linux/bottom_half.h 2017-04-18 17:56:30.601396665 +0200
  9844. @@ -4,6 +4,39 @@
  9845. #include <linux/preempt.h>
  9846. #include <linux/preempt_mask.h>
  9847. +#ifdef CONFIG_PREEMPT_RT_FULL
  9848. +
  9849. +extern void __local_bh_disable(void);
  9850. +extern void _local_bh_enable(void);
  9851. +extern void __local_bh_enable(void);
  9852. +
  9853. +static inline void local_bh_disable(void)
  9854. +{
  9855. + __local_bh_disable();
  9856. +}
  9857. +
  9858. +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
  9859. +{
  9860. + __local_bh_disable();
  9861. +}
  9862. +
  9863. +static inline void local_bh_enable(void)
  9864. +{
  9865. + __local_bh_enable();
  9866. +}
  9867. +
  9868. +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
  9869. +{
  9870. + __local_bh_enable();
  9871. +}
  9872. +
  9873. +static inline void local_bh_enable_ip(unsigned long ip)
  9874. +{
  9875. + __local_bh_enable();
  9876. +}
  9877. +
  9878. +#else
  9879. +
  9880. #ifdef CONFIG_TRACE_IRQFLAGS
  9881. extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
  9882. #else
  9883. @@ -31,5 +64,6 @@
  9884. {
  9885. __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
  9886. }
  9887. +#endif
  9888. #endif /* _LINUX_BH_H */
  9889. diff -Nur linux-4.1.39.orig/include/linux/buffer_head.h linux-4.1.39/include/linux/buffer_head.h
  9890. --- linux-4.1.39.orig/include/linux/buffer_head.h 2017-03-13 21:04:36.000000000 +0100
  9891. +++ linux-4.1.39/include/linux/buffer_head.h 2017-04-18 17:56:30.601396665 +0200
  9892. @@ -75,8 +75,52 @@
  9893. struct address_space *b_assoc_map; /* mapping this buffer is
  9894. associated with */
  9895. atomic_t b_count; /* users using this buffer_head */
  9896. +#ifdef CONFIG_PREEMPT_RT_BASE
  9897. + spinlock_t b_uptodate_lock;
  9898. +#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
  9899. + defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
  9900. + spinlock_t b_state_lock;
  9901. + spinlock_t b_journal_head_lock;
  9902. +#endif
  9903. +#endif
  9904. };
  9905. +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
  9906. +{
  9907. + unsigned long flags;
  9908. +
  9909. +#ifndef CONFIG_PREEMPT_RT_BASE
  9910. + local_irq_save(flags);
  9911. + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
  9912. +#else
  9913. + spin_lock_irqsave(&bh->b_uptodate_lock, flags);
  9914. +#endif
  9915. + return flags;
  9916. +}
  9917. +
  9918. +static inline void
  9919. +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
  9920. +{
  9921. +#ifndef CONFIG_PREEMPT_RT_BASE
  9922. + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
  9923. + local_irq_restore(flags);
  9924. +#else
  9925. + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
  9926. +#endif
  9927. +}
  9928. +
  9929. +static inline void buffer_head_init_locks(struct buffer_head *bh)
  9930. +{
  9931. +#ifdef CONFIG_PREEMPT_RT_BASE
  9932. + spin_lock_init(&bh->b_uptodate_lock);
  9933. +#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
  9934. + defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
  9935. + spin_lock_init(&bh->b_state_lock);
  9936. + spin_lock_init(&bh->b_journal_head_lock);
  9937. +#endif
  9938. +#endif
  9939. +}
  9940. +
  9941. /*
  9942. * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
  9943. * and buffer_foo() functions.
  9944. diff -Nur linux-4.1.39.orig/include/linux/cgroup-defs.h linux-4.1.39/include/linux/cgroup-defs.h
  9945. --- linux-4.1.39.orig/include/linux/cgroup-defs.h 2017-03-13 21:04:36.000000000 +0100
  9946. +++ linux-4.1.39/include/linux/cgroup-defs.h 2017-04-18 17:56:30.601396665 +0200
  9947. @@ -124,6 +124,7 @@
  9948. /* percpu_ref killing and RCU release */
  9949. struct rcu_head rcu_head;
  9950. struct work_struct destroy_work;
  9951. + struct swork_event destroy_swork;
  9952. };
  9953. /*
  9954. diff -Nur linux-4.1.39.orig/include/linux/cgroup.h linux-4.1.39/include/linux/cgroup.h
  9955. --- linux-4.1.39.orig/include/linux/cgroup.h 2017-03-13 21:04:36.000000000 +0100
  9956. +++ linux-4.1.39/include/linux/cgroup.h 2017-04-18 17:56:30.605396820 +0200
  9957. @@ -17,6 +17,8 @@
  9958. #include <linux/fs.h>
  9959. #include <linux/seq_file.h>
  9960. #include <linux/kernfs.h>
  9961. +#include <linux/wait.h>
  9962. +#include <linux/work-simple.h>
  9963. #include <linux/cgroup-defs.h>
  9964. diff -Nur linux-4.1.39.orig/include/linux/completion.h linux-4.1.39/include/linux/completion.h
  9965. --- linux-4.1.39.orig/include/linux/completion.h 2017-03-13 21:04:36.000000000 +0100
  9966. +++ linux-4.1.39/include/linux/completion.h 2017-04-18 17:56:30.605396820 +0200
  9967. @@ -7,8 +7,7 @@
  9968. * Atomic wait-for-completion handler data structures.
  9969. * See kernel/sched/completion.c for details.
  9970. */
  9971. -
  9972. -#include <linux/wait.h>
  9973. +#include <linux/wait-simple.h>
  9974. /*
  9975. * struct completion - structure used to maintain state for a "completion"
  9976. @@ -24,11 +23,11 @@
  9977. */
  9978. struct completion {
  9979. unsigned int done;
  9980. - wait_queue_head_t wait;
  9981. + struct swait_head wait;
  9982. };
  9983. #define COMPLETION_INITIALIZER(work) \
  9984. - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
  9985. + { 0, SWAIT_HEAD_INITIALIZER((work).wait) }
  9986. #define COMPLETION_INITIALIZER_ONSTACK(work) \
  9987. ({ init_completion(&work); work; })
  9988. @@ -73,7 +72,7 @@
  9989. static inline void init_completion(struct completion *x)
  9990. {
  9991. x->done = 0;
  9992. - init_waitqueue_head(&x->wait);
  9993. + init_swait_head(&x->wait);
  9994. }
  9995. /**
  9996. diff -Nur linux-4.1.39.orig/include/linux/cpu.h linux-4.1.39/include/linux/cpu.h
  9997. --- linux-4.1.39.orig/include/linux/cpu.h 2017-03-13 21:04:36.000000000 +0100
  9998. +++ linux-4.1.39/include/linux/cpu.h 2017-04-18 17:56:30.605396820 +0200
  9999. @@ -231,6 +231,8 @@
  10000. extern void put_online_cpus(void);
  10001. extern void cpu_hotplug_disable(void);
  10002. extern void cpu_hotplug_enable(void);
  10003. +extern void pin_current_cpu(void);
  10004. +extern void unpin_current_cpu(void);
  10005. #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
  10006. #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri)
  10007. #define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
  10008. @@ -249,6 +251,8 @@
  10009. #define put_online_cpus() do { } while (0)
  10010. #define cpu_hotplug_disable() do { } while (0)
  10011. #define cpu_hotplug_enable() do { } while (0)
  10012. +static inline void pin_current_cpu(void) { }
  10013. +static inline void unpin_current_cpu(void) { }
  10014. #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  10015. #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
  10016. /* These aren't inline functions due to a GCC bug. */
  10017. diff -Nur linux-4.1.39.orig/include/linux/delay.h linux-4.1.39/include/linux/delay.h
  10018. --- linux-4.1.39.orig/include/linux/delay.h 2017-03-13 21:04:36.000000000 +0100
  10019. +++ linux-4.1.39/include/linux/delay.h 2017-04-18 17:56:30.605396820 +0200
  10020. @@ -52,4 +52,10 @@
  10021. msleep(seconds * 1000);
  10022. }
  10023. +#ifdef CONFIG_PREEMPT_RT_FULL
  10024. +extern void cpu_chill(void);
  10025. +#else
  10026. +# define cpu_chill() cpu_relax()
  10027. +#endif
  10028. +
  10029. #endif /* defined(_LINUX_DELAY_H) */
  10030. diff -Nur linux-4.1.39.orig/include/linux/ftrace_event.h linux-4.1.39/include/linux/ftrace_event.h
  10031. --- linux-4.1.39.orig/include/linux/ftrace_event.h 2017-03-13 21:04:36.000000000 +0100
  10032. +++ linux-4.1.39/include/linux/ftrace_event.h 2017-04-18 17:56:30.605396820 +0200
  10033. @@ -66,6 +66,9 @@
  10034. unsigned char flags;
  10035. unsigned char preempt_count;
  10036. int pid;
  10037. + unsigned short migrate_disable;
  10038. + unsigned short padding;
  10039. + unsigned char preempt_lazy_count;
  10040. };
  10041. #define FTRACE_MAX_EVENT \
  10042. diff -Nur linux-4.1.39.orig/include/linux/ftrace.h linux-4.1.39/include/linux/ftrace.h
  10043. --- linux-4.1.39.orig/include/linux/ftrace.h 2017-03-13 21:04:36.000000000 +0100
  10044. +++ linux-4.1.39/include/linux/ftrace.h 2017-04-18 17:56:30.605396820 +0200
  10045. @@ -682,6 +682,18 @@
  10046. #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
  10047. #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
  10048. +static inline unsigned long get_lock_parent_ip(void)
  10049. +{
  10050. + unsigned long addr = CALLER_ADDR0;
  10051. +
  10052. + if (!in_lock_functions(addr))
  10053. + return addr;
  10054. + addr = CALLER_ADDR1;
  10055. + if (!in_lock_functions(addr))
  10056. + return addr;
  10057. + return CALLER_ADDR2;
  10058. +}
  10059. +
  10060. #ifdef CONFIG_IRQSOFF_TRACER
  10061. extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
  10062. extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
  10063. diff -Nur linux-4.1.39.orig/include/linux/highmem.h linux-4.1.39/include/linux/highmem.h
  10064. --- linux-4.1.39.orig/include/linux/highmem.h 2017-03-13 21:04:36.000000000 +0100
  10065. +++ linux-4.1.39/include/linux/highmem.h 2017-04-18 17:56:30.605396820 +0200
  10066. @@ -7,6 +7,7 @@
  10067. #include <linux/mm.h>
  10068. #include <linux/uaccess.h>
  10069. #include <linux/hardirq.h>
  10070. +#include <linux/sched.h>
  10071. #include <asm/cacheflush.h>
  10072. @@ -65,6 +66,7 @@
  10073. static inline void *kmap_atomic(struct page *page)
  10074. {
  10075. + preempt_disable_nort();
  10076. pagefault_disable();
  10077. return page_address(page);
  10078. }
  10079. @@ -73,6 +75,7 @@
  10080. static inline void __kunmap_atomic(void *addr)
  10081. {
  10082. pagefault_enable();
  10083. + preempt_enable_nort();
  10084. }
  10085. #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn))
  10086. @@ -85,32 +88,51 @@
  10087. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  10088. +#ifndef CONFIG_PREEMPT_RT_FULL
  10089. DECLARE_PER_CPU(int, __kmap_atomic_idx);
  10090. +#endif
  10091. static inline int kmap_atomic_idx_push(void)
  10092. {
  10093. +#ifndef CONFIG_PREEMPT_RT_FULL
  10094. int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
  10095. -#ifdef CONFIG_DEBUG_HIGHMEM
  10096. +# ifdef CONFIG_DEBUG_HIGHMEM
  10097. WARN_ON_ONCE(in_irq() && !irqs_disabled());
  10098. BUG_ON(idx >= KM_TYPE_NR);
  10099. -#endif
  10100. +# endif
  10101. return idx;
  10102. +#else
  10103. + current->kmap_idx++;
  10104. + BUG_ON(current->kmap_idx > KM_TYPE_NR);
  10105. + return current->kmap_idx - 1;
  10106. +#endif
  10107. }
  10108. static inline int kmap_atomic_idx(void)
  10109. {
  10110. +#ifndef CONFIG_PREEMPT_RT_FULL
  10111. return __this_cpu_read(__kmap_atomic_idx) - 1;
  10112. +#else
  10113. + return current->kmap_idx - 1;
  10114. +#endif
  10115. }
  10116. static inline void kmap_atomic_idx_pop(void)
  10117. {
  10118. -#ifdef CONFIG_DEBUG_HIGHMEM
  10119. +#ifndef CONFIG_PREEMPT_RT_FULL
  10120. +# ifdef CONFIG_DEBUG_HIGHMEM
  10121. int idx = __this_cpu_dec_return(__kmap_atomic_idx);
  10122. BUG_ON(idx < 0);
  10123. -#else
  10124. +# else
  10125. __this_cpu_dec(__kmap_atomic_idx);
  10126. +# endif
  10127. +#else
  10128. + current->kmap_idx--;
  10129. +# ifdef CONFIG_DEBUG_HIGHMEM
  10130. + BUG_ON(current->kmap_idx < 0);
  10131. +# endif
  10132. #endif
  10133. }
  10134. diff -Nur linux-4.1.39.orig/include/linux/hrtimer.h linux-4.1.39/include/linux/hrtimer.h
  10135. --- linux-4.1.39.orig/include/linux/hrtimer.h 2017-03-13 21:04:36.000000000 +0100
  10136. +++ linux-4.1.39/include/linux/hrtimer.h 2017-04-18 17:56:30.605396820 +0200
  10137. @@ -111,6 +111,11 @@
  10138. enum hrtimer_restart (*function)(struct hrtimer *);
  10139. struct hrtimer_clock_base *base;
  10140. unsigned long state;
  10141. + struct list_head cb_entry;
  10142. + int irqsafe;
  10143. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  10144. + ktime_t praecox;
  10145. +#endif
  10146. #ifdef CONFIG_TIMER_STATS
  10147. int start_pid;
  10148. void *start_site;
  10149. @@ -147,6 +152,7 @@
  10150. int index;
  10151. clockid_t clockid;
  10152. struct timerqueue_head active;
  10153. + struct list_head expired;
  10154. ktime_t resolution;
  10155. ktime_t (*get_time)(void);
  10156. ktime_t softirq_time;
  10157. @@ -194,6 +200,9 @@
  10158. unsigned long nr_hangs;
  10159. ktime_t max_hang_time;
  10160. #endif
  10161. +#ifdef CONFIG_PREEMPT_RT_BASE
  10162. + wait_queue_head_t wait;
  10163. +#endif
  10164. struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
  10165. };
  10166. @@ -381,6 +390,13 @@
  10167. return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
  10168. }
  10169. +/* Softirq preemption could deadlock timer removal */
  10170. +#ifdef CONFIG_PREEMPT_RT_BASE
  10171. + extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
  10172. +#else
  10173. +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
  10174. +#endif
  10175. +
  10176. /* Query timers: */
  10177. extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
  10178. extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
  10179. diff -Nur linux-4.1.39.orig/include/linux/idr.h linux-4.1.39/include/linux/idr.h
  10180. --- linux-4.1.39.orig/include/linux/idr.h 2017-03-13 21:04:36.000000000 +0100
  10181. +++ linux-4.1.39/include/linux/idr.h 2017-04-18 17:56:30.605396820 +0200
  10182. @@ -95,10 +95,14 @@
  10183. * Each idr_preload() should be matched with an invocation of this
  10184. * function. See idr_preload() for details.
  10185. */
  10186. +#ifdef CONFIG_PREEMPT_RT_FULL
  10187. +void idr_preload_end(void);
  10188. +#else
  10189. static inline void idr_preload_end(void)
  10190. {
  10191. preempt_enable();
  10192. }
  10193. +#endif
  10194. /**
  10195. * idr_find - return pointer for given id
  10196. diff -Nur linux-4.1.39.orig/include/linux/init_task.h linux-4.1.39/include/linux/init_task.h
  10197. --- linux-4.1.39.orig/include/linux/init_task.h 2017-03-13 21:04:36.000000000 +0100
  10198. +++ linux-4.1.39/include/linux/init_task.h 2017-04-18 17:56:30.605396820 +0200
  10199. @@ -147,9 +147,16 @@
  10200. # define INIT_PERF_EVENTS(tsk)
  10201. #endif
  10202. +#ifdef CONFIG_PREEMPT_RT_BASE
  10203. +# define INIT_TIMER_LIST .posix_timer_list = NULL,
  10204. +#else
  10205. +# define INIT_TIMER_LIST
  10206. +#endif
  10207. +
  10208. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  10209. # define INIT_VTIME(tsk) \
  10210. - .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
  10211. + .vtime_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.vtime_lock), \
  10212. + .vtime_seq = SEQCNT_ZERO(tsk.vtime_seq), \
  10213. .vtime_snap = 0, \
  10214. .vtime_snap_whence = VTIME_SYS,
  10215. #else
  10216. @@ -238,6 +245,7 @@
  10217. .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
  10218. .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
  10219. .timer_slack_ns = 50000, /* 50 usec default slack */ \
  10220. + INIT_TIMER_LIST \
  10221. .pids = { \
  10222. [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
  10223. [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
  10224. diff -Nur linux-4.1.39.orig/include/linux/interrupt.h linux-4.1.39/include/linux/interrupt.h
  10225. --- linux-4.1.39.orig/include/linux/interrupt.h 2017-03-13 21:04:36.000000000 +0100
  10226. +++ linux-4.1.39/include/linux/interrupt.h 2017-04-18 17:56:30.605396820 +0200
  10227. @@ -61,6 +61,7 @@
  10228. * interrupt handler after suspending interrupts. For system
  10229. * wakeup devices users need to implement wakeup detection in
  10230. * their interrupt handlers.
  10231. + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT)
  10232. */
  10233. #define IRQF_SHARED 0x00000080
  10234. #define IRQF_PROBE_SHARED 0x00000100
  10235. @@ -74,6 +75,7 @@
  10236. #define IRQF_NO_THREAD 0x00010000
  10237. #define IRQF_EARLY_RESUME 0x00020000
  10238. #define IRQF_COND_SUSPEND 0x00040000
  10239. +#define IRQF_NO_SOFTIRQ_CALL 0x00080000
  10240. #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
  10241. @@ -102,6 +104,7 @@
  10242. * @flags: flags (see IRQF_* above)
  10243. * @thread_fn: interrupt handler function for threaded interrupts
  10244. * @thread: thread pointer for threaded interrupts
  10245. + * @secondary: pointer to secondary irqaction (force threading)
  10246. * @thread_flags: flags related to @thread
  10247. * @thread_mask: bitmask for keeping track of @thread activity
  10248. * @dir: pointer to the proc/irq/NN/name entry
  10249. @@ -113,6 +116,7 @@
  10250. struct irqaction *next;
  10251. irq_handler_t thread_fn;
  10252. struct task_struct *thread;
  10253. + struct irqaction *secondary;
  10254. unsigned int irq;
  10255. unsigned int flags;
  10256. unsigned long thread_flags;
  10257. @@ -184,7 +188,7 @@
  10258. #ifdef CONFIG_LOCKDEP
  10259. # define local_irq_enable_in_hardirq() do { } while (0)
  10260. #else
  10261. -# define local_irq_enable_in_hardirq() local_irq_enable()
  10262. +# define local_irq_enable_in_hardirq() local_irq_enable_nort()
  10263. #endif
  10264. extern void disable_irq_nosync(unsigned int irq);
  10265. @@ -215,6 +219,7 @@
  10266. unsigned int irq;
  10267. struct kref kref;
  10268. struct work_struct work;
  10269. + struct list_head list;
  10270. void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
  10271. void (*release)(struct kref *ref);
  10272. };
  10273. @@ -377,9 +382,13 @@
  10274. bool state);
  10275. #ifdef CONFIG_IRQ_FORCED_THREADING
  10276. +# ifndef CONFIG_PREEMPT_RT_BASE
  10277. extern bool force_irqthreads;
  10278. +# else
  10279. +# define force_irqthreads (true)
  10280. +# endif
  10281. #else
  10282. -#define force_irqthreads (0)
  10283. +#define force_irqthreads (false)
  10284. #endif
  10285. #ifndef __ARCH_SET_SOFTIRQ_PENDING
  10286. @@ -435,9 +444,10 @@
  10287. void (*action)(struct softirq_action *);
  10288. };
  10289. +#ifndef CONFIG_PREEMPT_RT_FULL
  10290. asmlinkage void do_softirq(void);
  10291. asmlinkage void __do_softirq(void);
  10292. -
  10293. +static inline void thread_do_softirq(void) { do_softirq(); }
  10294. #ifdef __ARCH_HAS_DO_SOFTIRQ
  10295. void do_softirq_own_stack(void);
  10296. #else
  10297. @@ -446,13 +456,25 @@
  10298. __do_softirq();
  10299. }
  10300. #endif
  10301. +#else
  10302. +extern void thread_do_softirq(void);
  10303. +#endif
  10304. extern void open_softirq(int nr, void (*action)(struct softirq_action *));
  10305. extern void softirq_init(void);
  10306. extern void __raise_softirq_irqoff(unsigned int nr);
  10307. +#ifdef CONFIG_PREEMPT_RT_FULL
  10308. +extern void __raise_softirq_irqoff_ksoft(unsigned int nr);
  10309. +#else
  10310. +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr)
  10311. +{
  10312. + __raise_softirq_irqoff(nr);
  10313. +}
  10314. +#endif
  10315. extern void raise_softirq_irqoff(unsigned int nr);
  10316. extern void raise_softirq(unsigned int nr);
  10317. +extern void softirq_check_pending_idle(void);
  10318. DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
  10319. @@ -474,8 +496,9 @@
  10320. to be executed on some cpu at least once after this.
  10321. * If the tasklet is already scheduled, but its execution is still not
  10322. started, it will be executed only once.
  10323. - * If this tasklet is already running on another CPU (or schedule is called
  10324. - from tasklet itself), it is rescheduled for later.
  10325. + * If this tasklet is already running on another CPU, it is rescheduled
  10326. + for later.
  10327. + * Schedule must not be called from the tasklet itself (a lockup occurs)
  10328. * Tasklet is strictly serialized wrt itself, but not
  10329. wrt another tasklets. If client needs some intertask synchronization,
  10330. he makes it with spinlocks.
  10331. @@ -500,27 +523,36 @@
  10332. enum
  10333. {
  10334. TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */
  10335. - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
  10336. + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */
  10337. + TASKLET_STATE_PENDING /* Tasklet is pending */
  10338. };
  10339. -#ifdef CONFIG_SMP
  10340. +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED)
  10341. +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN)
  10342. +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING)
  10343. +
  10344. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  10345. static inline int tasklet_trylock(struct tasklet_struct *t)
  10346. {
  10347. return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
  10348. }
  10349. +static inline int tasklet_tryunlock(struct tasklet_struct *t)
  10350. +{
  10351. + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
  10352. +}
  10353. +
  10354. static inline void tasklet_unlock(struct tasklet_struct *t)
  10355. {
  10356. smp_mb__before_atomic();
  10357. clear_bit(TASKLET_STATE_RUN, &(t)->state);
  10358. }
  10359. -static inline void tasklet_unlock_wait(struct tasklet_struct *t)
  10360. -{
  10361. - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
  10362. -}
  10363. +extern void tasklet_unlock_wait(struct tasklet_struct *t);
  10364. +
  10365. #else
  10366. #define tasklet_trylock(t) 1
  10367. +#define tasklet_tryunlock(t) 1
  10368. #define tasklet_unlock_wait(t) do { } while (0)
  10369. #define tasklet_unlock(t) do { } while (0)
  10370. #endif
  10371. @@ -569,12 +601,7 @@
  10372. smp_mb();
  10373. }
  10374. -static inline void tasklet_enable(struct tasklet_struct *t)
  10375. -{
  10376. - smp_mb__before_atomic();
  10377. - atomic_dec(&t->count);
  10378. -}
  10379. -
  10380. +extern void tasklet_enable(struct tasklet_struct *t);
  10381. extern void tasklet_kill(struct tasklet_struct *t);
  10382. extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
  10383. extern void tasklet_init(struct tasklet_struct *t,
  10384. @@ -605,6 +632,12 @@
  10385. tasklet_kill(&ttimer->tasklet);
  10386. }
  10387. +#ifdef CONFIG_PREEMPT_RT_FULL
  10388. +extern void softirq_early_init(void);
  10389. +#else
  10390. +static inline void softirq_early_init(void) { }
  10391. +#endif
  10392. +
  10393. /*
  10394. * Autoprobing for irqs:
  10395. *
  10396. diff -Nur linux-4.1.39.orig/include/linux/io-mapping.h linux-4.1.39/include/linux/io-mapping.h
  10397. --- linux-4.1.39.orig/include/linux/io-mapping.h 2017-03-13 21:04:36.000000000 +0100
  10398. +++ linux-4.1.39/include/linux/io-mapping.h 2017-04-18 17:56:30.605396820 +0200
  10399. @@ -141,6 +141,7 @@
  10400. io_mapping_map_atomic_wc(struct io_mapping *mapping,
  10401. unsigned long offset)
  10402. {
  10403. + preempt_disable();
  10404. pagefault_disable();
  10405. return ((char __force __iomem *) mapping) + offset;
  10406. }
  10407. @@ -149,6 +150,7 @@
  10408. io_mapping_unmap_atomic(void __iomem *vaddr)
  10409. {
  10410. pagefault_enable();
  10411. + preempt_enable();
  10412. }
  10413. /* Non-atomic map/unmap */
  10414. diff -Nur linux-4.1.39.orig/include/linux/irqdesc.h linux-4.1.39/include/linux/irqdesc.h
  10415. --- linux-4.1.39.orig/include/linux/irqdesc.h 2017-03-13 21:04:36.000000000 +0100
  10416. +++ linux-4.1.39/include/linux/irqdesc.h 2017-04-18 17:56:30.605396820 +0200
  10417. @@ -63,6 +63,7 @@
  10418. unsigned int irqs_unhandled;
  10419. atomic_t threads_handled;
  10420. int threads_handled_last;
  10421. + u64 random_ip;
  10422. raw_spinlock_t lock;
  10423. struct cpumask *percpu_enabled;
  10424. #ifdef CONFIG_SMP
  10425. diff -Nur linux-4.1.39.orig/include/linux/irqflags.h linux-4.1.39/include/linux/irqflags.h
  10426. --- linux-4.1.39.orig/include/linux/irqflags.h 2017-03-13 21:04:36.000000000 +0100
  10427. +++ linux-4.1.39/include/linux/irqflags.h 2017-04-18 17:56:30.605396820 +0200
  10428. @@ -25,8 +25,6 @@
  10429. # define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
  10430. # define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
  10431. # define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
  10432. -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  10433. -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  10434. # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
  10435. #else
  10436. # define trace_hardirqs_on() do { } while (0)
  10437. @@ -39,9 +37,15 @@
  10438. # define trace_softirqs_enabled(p) 0
  10439. # define trace_hardirq_enter() do { } while (0)
  10440. # define trace_hardirq_exit() do { } while (0)
  10441. +# define INIT_TRACE_IRQFLAGS
  10442. +#endif
  10443. +
  10444. +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
  10445. +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
  10446. +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
  10447. +#else
  10448. # define lockdep_softirq_enter() do { } while (0)
  10449. # define lockdep_softirq_exit() do { } while (0)
  10450. -# define INIT_TRACE_IRQFLAGS
  10451. #endif
  10452. #if defined(CONFIG_IRQSOFF_TRACER) || \
  10453. @@ -148,4 +152,23 @@
  10454. #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
  10455. +/*
  10456. + * local_irq* variants depending on RT/!RT
  10457. + */
  10458. +#ifdef CONFIG_PREEMPT_RT_FULL
  10459. +# define local_irq_disable_nort() do { } while (0)
  10460. +# define local_irq_enable_nort() do { } while (0)
  10461. +# define local_irq_save_nort(flags) local_save_flags(flags)
  10462. +# define local_irq_restore_nort(flags) (void)(flags)
  10463. +# define local_irq_disable_rt() local_irq_disable()
  10464. +# define local_irq_enable_rt() local_irq_enable()
  10465. +#else
  10466. +# define local_irq_disable_nort() local_irq_disable()
  10467. +# define local_irq_enable_nort() local_irq_enable()
  10468. +# define local_irq_save_nort(flags) local_irq_save(flags)
  10469. +# define local_irq_restore_nort(flags) local_irq_restore(flags)
  10470. +# define local_irq_disable_rt() do { } while (0)
  10471. +# define local_irq_enable_rt() do { } while (0)
  10472. +#endif
  10473. +
  10474. #endif
  10475. diff -Nur linux-4.1.39.orig/include/linux/irq.h linux-4.1.39/include/linux/irq.h
  10476. --- linux-4.1.39.orig/include/linux/irq.h 2017-03-13 21:04:36.000000000 +0100
  10477. +++ linux-4.1.39/include/linux/irq.h 2017-04-18 17:56:30.605396820 +0200
  10478. @@ -72,6 +72,7 @@
  10479. * IRQ_IS_POLLED - Always polled by another interrupt. Exclude
  10480. * it from the spurious interrupt detection
  10481. * mechanism and from core side polling.
  10482. + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT)
  10483. */
  10484. enum {
  10485. IRQ_TYPE_NONE = 0x00000000,
  10486. @@ -97,13 +98,14 @@
  10487. IRQ_NOTHREAD = (1 << 16),
  10488. IRQ_PER_CPU_DEVID = (1 << 17),
  10489. IRQ_IS_POLLED = (1 << 18),
  10490. + IRQ_NO_SOFTIRQ_CALL = (1 << 19),
  10491. };
  10492. #define IRQF_MODIFY_MASK \
  10493. (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
  10494. IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
  10495. IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \
  10496. - IRQ_IS_POLLED)
  10497. + IRQ_IS_POLLED | IRQ_NO_SOFTIRQ_CALL)
  10498. #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
  10499. diff -Nur linux-4.1.39.orig/include/linux/irq_work.h linux-4.1.39/include/linux/irq_work.h
  10500. --- linux-4.1.39.orig/include/linux/irq_work.h 2017-03-13 21:04:36.000000000 +0100
  10501. +++ linux-4.1.39/include/linux/irq_work.h 2017-04-18 17:56:30.605396820 +0200
  10502. @@ -16,6 +16,7 @@
  10503. #define IRQ_WORK_BUSY 2UL
  10504. #define IRQ_WORK_FLAGS 3UL
  10505. #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */
  10506. +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */
  10507. struct irq_work {
  10508. unsigned long flags;
  10509. @@ -51,4 +52,10 @@
  10510. static inline void irq_work_run(void) { }
  10511. #endif
  10512. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  10513. +void irq_work_tick_soft(void);
  10514. +#else
  10515. +static inline void irq_work_tick_soft(void) { }
  10516. +#endif
  10517. +
  10518. #endif /* _LINUX_IRQ_WORK_H */
  10519. diff -Nur linux-4.1.39.orig/include/linux/jbd_common.h linux-4.1.39/include/linux/jbd_common.h
  10520. --- linux-4.1.39.orig/include/linux/jbd_common.h 2017-03-13 21:04:36.000000000 +0100
  10521. +++ linux-4.1.39/include/linux/jbd_common.h 2017-04-18 17:56:30.605396820 +0200
  10522. @@ -15,32 +15,56 @@
  10523. static inline void jbd_lock_bh_state(struct buffer_head *bh)
  10524. {
  10525. +#ifndef CONFIG_PREEMPT_RT_BASE
  10526. bit_spin_lock(BH_State, &bh->b_state);
  10527. +#else
  10528. + spin_lock(&bh->b_state_lock);
  10529. +#endif
  10530. }
  10531. static inline int jbd_trylock_bh_state(struct buffer_head *bh)
  10532. {
  10533. +#ifndef CONFIG_PREEMPT_RT_BASE
  10534. return bit_spin_trylock(BH_State, &bh->b_state);
  10535. +#else
  10536. + return spin_trylock(&bh->b_state_lock);
  10537. +#endif
  10538. }
  10539. static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
  10540. {
  10541. +#ifndef CONFIG_PREEMPT_RT_BASE
  10542. return bit_spin_is_locked(BH_State, &bh->b_state);
  10543. +#else
  10544. + return spin_is_locked(&bh->b_state_lock);
  10545. +#endif
  10546. }
  10547. static inline void jbd_unlock_bh_state(struct buffer_head *bh)
  10548. {
  10549. +#ifndef CONFIG_PREEMPT_RT_BASE
  10550. bit_spin_unlock(BH_State, &bh->b_state);
  10551. +#else
  10552. + spin_unlock(&bh->b_state_lock);
  10553. +#endif
  10554. }
  10555. static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
  10556. {
  10557. +#ifndef CONFIG_PREEMPT_RT_BASE
  10558. bit_spin_lock(BH_JournalHead, &bh->b_state);
  10559. +#else
  10560. + spin_lock(&bh->b_journal_head_lock);
  10561. +#endif
  10562. }
  10563. static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
  10564. {
  10565. +#ifndef CONFIG_PREEMPT_RT_BASE
  10566. bit_spin_unlock(BH_JournalHead, &bh->b_state);
  10567. +#else
  10568. + spin_unlock(&bh->b_journal_head_lock);
  10569. +#endif
  10570. }
  10571. #endif
  10572. diff -Nur linux-4.1.39.orig/include/linux/kdb.h linux-4.1.39/include/linux/kdb.h
  10573. --- linux-4.1.39.orig/include/linux/kdb.h 2017-03-13 21:04:36.000000000 +0100
  10574. +++ linux-4.1.39/include/linux/kdb.h 2017-04-18 17:56:30.605396820 +0200
  10575. @@ -167,6 +167,7 @@
  10576. extern __printf(1, 2) int kdb_printf(const char *, ...);
  10577. typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
  10578. +#define in_kdb_printk() (kdb_trap_printk)
  10579. extern void kdb_init(int level);
  10580. /* Access to kdb specific polling devices */
  10581. @@ -201,6 +202,7 @@
  10582. extern int kdb_unregister(char *);
  10583. #else /* ! CONFIG_KGDB_KDB */
  10584. static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
  10585. +#define in_kdb_printk() (0)
  10586. static inline void kdb_init(int level) {}
  10587. static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
  10588. char *help, short minlen) { return 0; }
  10589. diff -Nur linux-4.1.39.orig/include/linux/kernel.h linux-4.1.39/include/linux/kernel.h
  10590. --- linux-4.1.39.orig/include/linux/kernel.h 2017-03-13 21:04:36.000000000 +0100
  10591. +++ linux-4.1.39/include/linux/kernel.h 2017-04-18 17:56:30.605396820 +0200
  10592. @@ -188,6 +188,9 @@
  10593. */
  10594. # define might_sleep() \
  10595. do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  10596. +
  10597. +# define might_sleep_no_state_check() \
  10598. + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  10599. # define sched_annotate_sleep() (current->task_state_change = 0)
  10600. #else
  10601. static inline void ___might_sleep(const char *file, int line,
  10602. @@ -195,6 +198,7 @@
  10603. static inline void __might_sleep(const char *file, int line,
  10604. int preempt_offset) { }
  10605. # define might_sleep() do { might_resched(); } while (0)
  10606. +# define might_sleep_no_state_check() do { might_resched(); } while (0)
  10607. # define sched_annotate_sleep() do { } while (0)
  10608. #endif
  10609. @@ -244,7 +248,8 @@
  10610. #if defined(CONFIG_MMU) && \
  10611. (defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP))
  10612. -void might_fault(void);
  10613. +#define might_fault() __might_fault(__FILE__, __LINE__)
  10614. +void __might_fault(const char *file, int line);
  10615. #else
  10616. static inline void might_fault(void) { }
  10617. #endif
  10618. @@ -466,6 +471,7 @@
  10619. SYSTEM_HALT,
  10620. SYSTEM_POWER_OFF,
  10621. SYSTEM_RESTART,
  10622. + SYSTEM_SUSPEND,
  10623. } system_state;
  10624. #define TAINT_PROPRIETARY_MODULE 0
  10625. diff -Nur linux-4.1.39.orig/include/linux/kvm_host.h linux-4.1.39/include/linux/kvm_host.h
  10626. --- linux-4.1.39.orig/include/linux/kvm_host.h 2017-03-13 21:04:36.000000000 +0100
  10627. +++ linux-4.1.39/include/linux/kvm_host.h 2017-04-18 17:56:30.605396820 +0200
  10628. @@ -230,7 +230,7 @@
  10629. int fpu_active;
  10630. int guest_fpu_loaded, guest_xcr0_loaded;
  10631. - wait_queue_head_t wq;
  10632. + struct swait_head wq;
  10633. struct pid *pid;
  10634. int sigset_active;
  10635. sigset_t sigset;
  10636. @@ -701,7 +701,7 @@
  10637. }
  10638. #endif
  10639. -static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
  10640. +static inline struct swait_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
  10641. {
  10642. #ifdef __KVM_HAVE_ARCH_WQP
  10643. return vcpu->arch.wqp;
  10644. diff -Nur linux-4.1.39.orig/include/linux/lglock.h linux-4.1.39/include/linux/lglock.h
  10645. --- linux-4.1.39.orig/include/linux/lglock.h 2017-03-13 21:04:36.000000000 +0100
  10646. +++ linux-4.1.39/include/linux/lglock.h 2017-04-18 17:56:30.605396820 +0200
  10647. @@ -34,22 +34,39 @@
  10648. #endif
  10649. struct lglock {
  10650. +#ifndef CONFIG_PREEMPT_RT_FULL
  10651. arch_spinlock_t __percpu *lock;
  10652. +#else
  10653. + struct rt_mutex __percpu *lock;
  10654. +#endif
  10655. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  10656. struct lock_class_key lock_key;
  10657. struct lockdep_map lock_dep_map;
  10658. #endif
  10659. };
  10660. -#define DEFINE_LGLOCK(name) \
  10661. +#ifndef CONFIG_PREEMPT_RT_FULL
  10662. +# define DEFINE_LGLOCK(name) \
  10663. static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
  10664. = __ARCH_SPIN_LOCK_UNLOCKED; \
  10665. struct lglock name = { .lock = &name ## _lock }
  10666. -#define DEFINE_STATIC_LGLOCK(name) \
  10667. +# define DEFINE_STATIC_LGLOCK(name) \
  10668. static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \
  10669. = __ARCH_SPIN_LOCK_UNLOCKED; \
  10670. static struct lglock name = { .lock = &name ## _lock }
  10671. +#else
  10672. +
  10673. +# define DEFINE_LGLOCK(name) \
  10674. + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
  10675. + = __RT_MUTEX_INITIALIZER( name ## _lock); \
  10676. + struct lglock name = { .lock = &name ## _lock }
  10677. +
  10678. +# define DEFINE_STATIC_LGLOCK(name) \
  10679. + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \
  10680. + = __RT_MUTEX_INITIALIZER( name ## _lock); \
  10681. + static struct lglock name = { .lock = &name ## _lock }
  10682. +#endif
  10683. void lg_lock_init(struct lglock *lg, char *name);
  10684. void lg_local_lock(struct lglock *lg);
  10685. @@ -59,6 +76,12 @@
  10686. void lg_global_lock(struct lglock *lg);
  10687. void lg_global_unlock(struct lglock *lg);
  10688. +#ifndef CONFIG_PREEMPT_RT_FULL
  10689. +#define lg_global_trylock_relax(name) lg_global_lock(name)
  10690. +#else
  10691. +void lg_global_trylock_relax(struct lglock *lg);
  10692. +#endif
  10693. +
  10694. #else
  10695. /* When !CONFIG_SMP, map lglock to spinlock */
  10696. #define lglock spinlock
  10697. diff -Nur linux-4.1.39.orig/include/linux/list_bl.h linux-4.1.39/include/linux/list_bl.h
  10698. --- linux-4.1.39.orig/include/linux/list_bl.h 2017-03-13 21:04:36.000000000 +0100
  10699. +++ linux-4.1.39/include/linux/list_bl.h 2017-04-18 17:56:30.605396820 +0200
  10700. @@ -2,6 +2,7 @@
  10701. #define _LINUX_LIST_BL_H
  10702. #include <linux/list.h>
  10703. +#include <linux/spinlock.h>
  10704. #include <linux/bit_spinlock.h>
  10705. /*
  10706. @@ -32,13 +33,24 @@
  10707. struct hlist_bl_head {
  10708. struct hlist_bl_node *first;
  10709. +#ifdef CONFIG_PREEMPT_RT_BASE
  10710. + raw_spinlock_t lock;
  10711. +#endif
  10712. };
  10713. struct hlist_bl_node {
  10714. struct hlist_bl_node *next, **pprev;
  10715. };
  10716. -#define INIT_HLIST_BL_HEAD(ptr) \
  10717. - ((ptr)->first = NULL)
  10718. +
  10719. +#ifdef CONFIG_PREEMPT_RT_BASE
  10720. +#define INIT_HLIST_BL_HEAD(h) \
  10721. +do { \
  10722. + (h)->first = NULL; \
  10723. + raw_spin_lock_init(&(h)->lock); \
  10724. +} while (0)
  10725. +#else
  10726. +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL
  10727. +#endif
  10728. static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
  10729. {
  10730. @@ -117,12 +129,26 @@
  10731. static inline void hlist_bl_lock(struct hlist_bl_head *b)
  10732. {
  10733. +#ifndef CONFIG_PREEMPT_RT_BASE
  10734. bit_spin_lock(0, (unsigned long *)b);
  10735. +#else
  10736. + raw_spin_lock(&b->lock);
  10737. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  10738. + __set_bit(0, (unsigned long *)b);
  10739. +#endif
  10740. +#endif
  10741. }
  10742. static inline void hlist_bl_unlock(struct hlist_bl_head *b)
  10743. {
  10744. +#ifndef CONFIG_PREEMPT_RT_BASE
  10745. __bit_spin_unlock(0, (unsigned long *)b);
  10746. +#else
  10747. +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  10748. + __clear_bit(0, (unsigned long *)b);
  10749. +#endif
  10750. + raw_spin_unlock(&b->lock);
  10751. +#endif
  10752. }
  10753. static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
  10754. diff -Nur linux-4.1.39.orig/include/linux/locallock.h linux-4.1.39/include/linux/locallock.h
  10755. --- linux-4.1.39.orig/include/linux/locallock.h 1970-01-01 01:00:00.000000000 +0100
  10756. +++ linux-4.1.39/include/linux/locallock.h 2017-04-18 17:56:30.605396820 +0200
  10757. @@ -0,0 +1,276 @@
  10758. +#ifndef _LINUX_LOCALLOCK_H
  10759. +#define _LINUX_LOCALLOCK_H
  10760. +
  10761. +#include <linux/percpu.h>
  10762. +#include <linux/spinlock.h>
  10763. +
  10764. +#ifdef CONFIG_PREEMPT_RT_BASE
  10765. +
  10766. +#ifdef CONFIG_DEBUG_SPINLOCK
  10767. +# define LL_WARN(cond) WARN_ON(cond)
  10768. +#else
  10769. +# define LL_WARN(cond) do { } while (0)
  10770. +#endif
  10771. +
  10772. +/*
  10773. + * per cpu lock based substitute for local_irq_*()
  10774. + */
  10775. +struct local_irq_lock {
  10776. + spinlock_t lock;
  10777. + struct task_struct *owner;
  10778. + int nestcnt;
  10779. + unsigned long flags;
  10780. +};
  10781. +
  10782. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \
  10783. + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \
  10784. + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) }
  10785. +
  10786. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \
  10787. + DECLARE_PER_CPU(struct local_irq_lock, lvar)
  10788. +
  10789. +#define local_irq_lock_init(lvar) \
  10790. + do { \
  10791. + int __cpu; \
  10792. + for_each_possible_cpu(__cpu) \
  10793. + spin_lock_init(&per_cpu(lvar, __cpu).lock); \
  10794. + } while (0)
  10795. +
  10796. +/*
  10797. + * spin_lock|trylock|unlock_local flavour that does not migrate disable
  10798. + * used for __local_lock|trylock|unlock where get_local_var/put_local_var
  10799. + * already takes care of the migrate_disable/enable
  10800. + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls.
  10801. + */
  10802. +#ifdef CONFIG_PREEMPT_RT_FULL
  10803. +# define spin_lock_local(lock) rt_spin_lock(lock)
  10804. +# define spin_trylock_local(lock) rt_spin_trylock(lock)
  10805. +# define spin_unlock_local(lock) rt_spin_unlock(lock)
  10806. +#else
  10807. +# define spin_lock_local(lock) spin_lock(lock)
  10808. +# define spin_trylock_local(lock) spin_trylock(lock)
  10809. +# define spin_unlock_local(lock) spin_unlock(lock)
  10810. +#endif
  10811. +
  10812. +static inline void __local_lock(struct local_irq_lock *lv)
  10813. +{
  10814. + if (lv->owner != current) {
  10815. + spin_lock_local(&lv->lock);
  10816. + LL_WARN(lv->owner);
  10817. + LL_WARN(lv->nestcnt);
  10818. + lv->owner = current;
  10819. + }
  10820. + lv->nestcnt++;
  10821. +}
  10822. +
  10823. +#define local_lock(lvar) \
  10824. + do { __local_lock(&get_local_var(lvar)); } while (0)
  10825. +
  10826. +#define local_lock_on(lvar, cpu) \
  10827. + do { __local_lock(&per_cpu(lvar, cpu)); } while (0)
  10828. +
  10829. +static inline int __local_trylock(struct local_irq_lock *lv)
  10830. +{
  10831. + if (lv->owner != current && spin_trylock_local(&lv->lock)) {
  10832. + LL_WARN(lv->owner);
  10833. + LL_WARN(lv->nestcnt);
  10834. + lv->owner = current;
  10835. + lv->nestcnt = 1;
  10836. + return 1;
  10837. + }
  10838. + return 0;
  10839. +}
  10840. +
  10841. +#define local_trylock(lvar) \
  10842. + ({ \
  10843. + int __locked; \
  10844. + __locked = __local_trylock(&get_local_var(lvar)); \
  10845. + if (!__locked) \
  10846. + put_local_var(lvar); \
  10847. + __locked; \
  10848. + })
  10849. +
  10850. +static inline void __local_unlock(struct local_irq_lock *lv)
  10851. +{
  10852. + LL_WARN(lv->nestcnt == 0);
  10853. + LL_WARN(lv->owner != current);
  10854. + if (--lv->nestcnt)
  10855. + return;
  10856. +
  10857. + lv->owner = NULL;
  10858. + spin_unlock_local(&lv->lock);
  10859. +}
  10860. +
  10861. +#define local_unlock(lvar) \
  10862. + do { \
  10863. + __local_unlock(this_cpu_ptr(&lvar)); \
  10864. + put_local_var(lvar); \
  10865. + } while (0)
  10866. +
  10867. +#define local_unlock_on(lvar, cpu) \
  10868. + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0)
  10869. +
  10870. +static inline void __local_lock_irq(struct local_irq_lock *lv)
  10871. +{
  10872. + spin_lock_irqsave(&lv->lock, lv->flags);
  10873. + LL_WARN(lv->owner);
  10874. + LL_WARN(lv->nestcnt);
  10875. + lv->owner = current;
  10876. + lv->nestcnt = 1;
  10877. +}
  10878. +
  10879. +#define local_lock_irq(lvar) \
  10880. + do { __local_lock_irq(&get_local_var(lvar)); } while (0)
  10881. +
  10882. +#define local_lock_irq_on(lvar, cpu) \
  10883. + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0)
  10884. +
  10885. +static inline void __local_unlock_irq(struct local_irq_lock *lv)
  10886. +{
  10887. + LL_WARN(!lv->nestcnt);
  10888. + LL_WARN(lv->owner != current);
  10889. + lv->owner = NULL;
  10890. + lv->nestcnt = 0;
  10891. + spin_unlock_irq(&lv->lock);
  10892. +}
  10893. +
  10894. +#define local_unlock_irq(lvar) \
  10895. + do { \
  10896. + __local_unlock_irq(this_cpu_ptr(&lvar)); \
  10897. + put_local_var(lvar); \
  10898. + } while (0)
  10899. +
  10900. +#define local_unlock_irq_on(lvar, cpu) \
  10901. + do { \
  10902. + __local_unlock_irq(&per_cpu(lvar, cpu)); \
  10903. + } while (0)
  10904. +
  10905. +static inline int __local_lock_irqsave(struct local_irq_lock *lv)
  10906. +{
  10907. + if (lv->owner != current) {
  10908. + __local_lock_irq(lv);
  10909. + return 0;
  10910. + } else {
  10911. + lv->nestcnt++;
  10912. + return 1;
  10913. + }
  10914. +}
  10915. +
  10916. +#define local_lock_irqsave(lvar, _flags) \
  10917. + do { \
  10918. + if (__local_lock_irqsave(&get_local_var(lvar))) \
  10919. + put_local_var(lvar); \
  10920. + _flags = __this_cpu_read(lvar.flags); \
  10921. + } while (0)
  10922. +
  10923. +#define local_lock_irqsave_on(lvar, _flags, cpu) \
  10924. + do { \
  10925. + __local_lock_irqsave(&per_cpu(lvar, cpu)); \
  10926. + _flags = per_cpu(lvar, cpu).flags; \
  10927. + } while (0)
  10928. +
  10929. +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
  10930. + unsigned long flags)
  10931. +{
  10932. + LL_WARN(!lv->nestcnt);
  10933. + LL_WARN(lv->owner != current);
  10934. + if (--lv->nestcnt)
  10935. + return 0;
  10936. +
  10937. + lv->owner = NULL;
  10938. + spin_unlock_irqrestore(&lv->lock, lv->flags);
  10939. + return 1;
  10940. +}
  10941. +
  10942. +#define local_unlock_irqrestore(lvar, flags) \
  10943. + do { \
  10944. + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \
  10945. + put_local_var(lvar); \
  10946. + } while (0)
  10947. +
  10948. +#define local_unlock_irqrestore_on(lvar, flags, cpu) \
  10949. + do { \
  10950. + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \
  10951. + } while (0)
  10952. +
  10953. +#define local_spin_trylock_irq(lvar, lock) \
  10954. + ({ \
  10955. + int __locked; \
  10956. + local_lock_irq(lvar); \
  10957. + __locked = spin_trylock(lock); \
  10958. + if (!__locked) \
  10959. + local_unlock_irq(lvar); \
  10960. + __locked; \
  10961. + })
  10962. +
  10963. +#define local_spin_lock_irq(lvar, lock) \
  10964. + do { \
  10965. + local_lock_irq(lvar); \
  10966. + spin_lock(lock); \
  10967. + } while (0)
  10968. +
  10969. +#define local_spin_unlock_irq(lvar, lock) \
  10970. + do { \
  10971. + spin_unlock(lock); \
  10972. + local_unlock_irq(lvar); \
  10973. + } while (0)
  10974. +
  10975. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  10976. + do { \
  10977. + local_lock_irqsave(lvar, flags); \
  10978. + spin_lock(lock); \
  10979. + } while (0)
  10980. +
  10981. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  10982. + do { \
  10983. + spin_unlock(lock); \
  10984. + local_unlock_irqrestore(lvar, flags); \
  10985. + } while (0)
  10986. +
  10987. +#define get_locked_var(lvar, var) \
  10988. + (*({ \
  10989. + local_lock(lvar); \
  10990. + this_cpu_ptr(&var); \
  10991. + }))
  10992. +
  10993. +#define put_locked_var(lvar, var) local_unlock(lvar);
  10994. +
  10995. +#define local_lock_cpu(lvar) \
  10996. + ({ \
  10997. + local_lock(lvar); \
  10998. + smp_processor_id(); \
  10999. + })
  11000. +
  11001. +#define local_unlock_cpu(lvar) local_unlock(lvar)
  11002. +
  11003. +#else /* PREEMPT_RT_BASE */
  11004. +
  11005. +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar
  11006. +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar
  11007. +
  11008. +static inline void local_irq_lock_init(int lvar) { }
  11009. +
  11010. +#define local_lock(lvar) preempt_disable()
  11011. +#define local_unlock(lvar) preempt_enable()
  11012. +#define local_lock_irq(lvar) local_irq_disable()
  11013. +#define local_unlock_irq(lvar) local_irq_enable()
  11014. +#define local_lock_irqsave(lvar, flags) local_irq_save(flags)
  11015. +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags)
  11016. +
  11017. +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock)
  11018. +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock)
  11019. +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock)
  11020. +#define local_spin_lock_irqsave(lvar, lock, flags) \
  11021. + spin_lock_irqsave(lock, flags)
  11022. +#define local_spin_unlock_irqrestore(lvar, lock, flags) \
  11023. + spin_unlock_irqrestore(lock, flags)
  11024. +
  11025. +#define get_locked_var(lvar, var) get_cpu_var(var)
  11026. +#define put_locked_var(lvar, var) put_cpu_var(var)
  11027. +
  11028. +#define local_lock_cpu(lvar) get_cpu()
  11029. +#define local_unlock_cpu(lvar) put_cpu()
  11030. +
  11031. +#endif
  11032. +
  11033. +#endif
  11034. diff -Nur linux-4.1.39.orig/include/linux/mm_types.h linux-4.1.39/include/linux/mm_types.h
  11035. --- linux-4.1.39.orig/include/linux/mm_types.h 2017-03-13 21:04:36.000000000 +0100
  11036. +++ linux-4.1.39/include/linux/mm_types.h 2017-04-18 17:56:30.605396820 +0200
  11037. @@ -11,6 +11,7 @@
  11038. #include <linux/completion.h>
  11039. #include <linux/cpumask.h>
  11040. #include <linux/uprobes.h>
  11041. +#include <linux/rcupdate.h>
  11042. #include <linux/page-flags-layout.h>
  11043. #include <asm/page.h>
  11044. #include <asm/mmu.h>
  11045. @@ -453,6 +454,9 @@
  11046. bool tlb_flush_pending;
  11047. #endif
  11048. struct uprobes_state uprobes_state;
  11049. +#ifdef CONFIG_PREEMPT_RT_BASE
  11050. + struct rcu_head delayed_drop;
  11051. +#endif
  11052. #ifdef CONFIG_X86_INTEL_MPX
  11053. /* address of the bounds directory */
  11054. void __user *bd_addr;
  11055. diff -Nur linux-4.1.39.orig/include/linux/module.h linux-4.1.39/include/linux/module.h
  11056. --- linux-4.1.39.orig/include/linux/module.h 2017-03-13 21:04:36.000000000 +0100
  11057. +++ linux-4.1.39/include/linux/module.h 2017-04-18 17:56:30.605396820 +0200
  11058. @@ -386,6 +386,7 @@
  11059. struct module *__module_text_address(unsigned long addr);
  11060. struct module *__module_address(unsigned long addr);
  11061. bool is_module_address(unsigned long addr);
  11062. +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr);
  11063. bool is_module_percpu_address(unsigned long addr);
  11064. bool is_module_text_address(unsigned long addr);
  11065. @@ -537,6 +538,11 @@
  11066. {
  11067. return false;
  11068. }
  11069. +
  11070. +static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
  11071. +{
  11072. + return false;
  11073. +}
  11074. static inline bool is_module_text_address(unsigned long addr)
  11075. {
  11076. diff -Nur linux-4.1.39.orig/include/linux/mutex.h linux-4.1.39/include/linux/mutex.h
  11077. --- linux-4.1.39.orig/include/linux/mutex.h 2017-03-13 21:04:36.000000000 +0100
  11078. +++ linux-4.1.39/include/linux/mutex.h 2017-04-18 17:56:30.605396820 +0200
  11079. @@ -19,6 +19,17 @@
  11080. #include <asm/processor.h>
  11081. #include <linux/osq_lock.h>
  11082. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11083. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  11084. + , .dep_map = { .name = #lockname }
  11085. +#else
  11086. +# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  11087. +#endif
  11088. +
  11089. +#ifdef CONFIG_PREEMPT_RT_FULL
  11090. +# include <linux/mutex_rt.h>
  11091. +#else
  11092. +
  11093. /*
  11094. * Simple, straightforward mutexes with strict semantics:
  11095. *
  11096. @@ -99,13 +110,6 @@
  11097. static inline void mutex_destroy(struct mutex *lock) {}
  11098. #endif
  11099. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11100. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
  11101. - , .dep_map = { .name = #lockname }
  11102. -#else
  11103. -# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
  11104. -#endif
  11105. -
  11106. #define __MUTEX_INITIALIZER(lockname) \
  11107. { .count = ATOMIC_INIT(1) \
  11108. , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
  11109. @@ -173,6 +177,8 @@
  11110. extern int mutex_trylock(struct mutex *lock);
  11111. extern void mutex_unlock(struct mutex *lock);
  11112. +#endif /* !PREEMPT_RT_FULL */
  11113. +
  11114. extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
  11115. #endif /* __LINUX_MUTEX_H */
  11116. diff -Nur linux-4.1.39.orig/include/linux/mutex_rt.h linux-4.1.39/include/linux/mutex_rt.h
  11117. --- linux-4.1.39.orig/include/linux/mutex_rt.h 1970-01-01 01:00:00.000000000 +0100
  11118. +++ linux-4.1.39/include/linux/mutex_rt.h 2017-04-18 17:56:30.605396820 +0200
  11119. @@ -0,0 +1,89 @@
  11120. +#ifndef __LINUX_MUTEX_RT_H
  11121. +#define __LINUX_MUTEX_RT_H
  11122. +
  11123. +#ifndef __LINUX_MUTEX_H
  11124. +#error "Please include mutex.h"
  11125. +#endif
  11126. +
  11127. +#include <linux/rtmutex.h>
  11128. +
  11129. +/* FIXME: Just for __lockfunc */
  11130. +#include <linux/spinlock.h>
  11131. +
  11132. +struct mutex {
  11133. + struct rt_mutex lock;
  11134. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11135. + struct lockdep_map dep_map;
  11136. +#endif
  11137. +};
  11138. +
  11139. +#define __MUTEX_INITIALIZER(mutexname) \
  11140. + { \
  11141. + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \
  11142. + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \
  11143. + }
  11144. +
  11145. +#define DEFINE_MUTEX(mutexname) \
  11146. + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
  11147. +
  11148. +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
  11149. +extern void __lockfunc _mutex_lock(struct mutex *lock);
  11150. +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
  11151. +extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
  11152. +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
  11153. +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
  11154. +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
  11155. +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
  11156. +extern int __lockfunc _mutex_trylock(struct mutex *lock);
  11157. +extern void __lockfunc _mutex_unlock(struct mutex *lock);
  11158. +
  11159. +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock)
  11160. +#define mutex_lock(l) _mutex_lock(l)
  11161. +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l)
  11162. +#define mutex_lock_killable(l) _mutex_lock_killable(l)
  11163. +#define mutex_trylock(l) _mutex_trylock(l)
  11164. +#define mutex_unlock(l) _mutex_unlock(l)
  11165. +
  11166. +#ifdef CONFIG_DEBUG_MUTEXES
  11167. +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock)
  11168. +#else
  11169. +static inline void mutex_destroy(struct mutex *lock) {}
  11170. +#endif
  11171. +
  11172. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11173. +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s)
  11174. +# define mutex_lock_interruptible_nested(l, s) \
  11175. + _mutex_lock_interruptible_nested(l, s)
  11176. +# define mutex_lock_killable_nested(l, s) \
  11177. + _mutex_lock_killable_nested(l, s)
  11178. +
  11179. +# define mutex_lock_nest_lock(lock, nest_lock) \
  11180. +do { \
  11181. + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
  11182. + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
  11183. +} while (0)
  11184. +
  11185. +#else
  11186. +# define mutex_lock_nested(l, s) _mutex_lock(l)
  11187. +# define mutex_lock_interruptible_nested(l, s) \
  11188. + _mutex_lock_interruptible(l)
  11189. +# define mutex_lock_killable_nested(l, s) \
  11190. + _mutex_lock_killable(l)
  11191. +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
  11192. +#endif
  11193. +
  11194. +# define mutex_init(mutex) \
  11195. +do { \
  11196. + static struct lock_class_key __key; \
  11197. + \
  11198. + rt_mutex_init(&(mutex)->lock); \
  11199. + __mutex_do_init((mutex), #mutex, &__key); \
  11200. +} while (0)
  11201. +
  11202. +# define __mutex_init(mutex, name, key) \
  11203. +do { \
  11204. + rt_mutex_init(&(mutex)->lock); \
  11205. + __mutex_do_init((mutex), name, key); \
  11206. +} while (0)
  11207. +
  11208. +#endif
  11209. diff -Nur linux-4.1.39.orig/include/linux/netdevice.h linux-4.1.39/include/linux/netdevice.h
  11210. --- linux-4.1.39.orig/include/linux/netdevice.h 2017-03-13 21:04:36.000000000 +0100
  11211. +++ linux-4.1.39/include/linux/netdevice.h 2017-04-18 17:56:30.609396976 +0200
  11212. @@ -390,7 +390,19 @@
  11213. typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
  11214. void __napi_schedule(struct napi_struct *n);
  11215. +
  11216. +/*
  11217. + * When PREEMPT_RT_FULL is defined, all device interrupt handlers
  11218. + * run as threads, and they can also be preempted (without PREEMPT_RT
  11219. + * interrupt threads can not be preempted). Which means that calling
  11220. + * __napi_schedule_irqoff() from an interrupt handler can be preempted
  11221. + * and can corrupt the napi->poll_list.
  11222. + */
  11223. +#ifdef CONFIG_PREEMPT_RT_FULL
  11224. +#define __napi_schedule_irqoff(n) __napi_schedule(n)
  11225. +#else
  11226. void __napi_schedule_irqoff(struct napi_struct *n);
  11227. +#endif
  11228. static inline bool napi_disable_pending(struct napi_struct *n)
  11229. {
  11230. @@ -2215,11 +2227,20 @@
  11231. void synchronize_net(void);
  11232. int init_dummy_netdev(struct net_device *dev);
  11233. +#ifdef CONFIG_PREEMPT_RT_FULL
  11234. +static inline int dev_recursion_level(void)
  11235. +{
  11236. + return current->xmit_recursion;
  11237. +}
  11238. +
  11239. +#else
  11240. +
  11241. DECLARE_PER_CPU(int, xmit_recursion);
  11242. static inline int dev_recursion_level(void)
  11243. {
  11244. return this_cpu_read(xmit_recursion);
  11245. }
  11246. +#endif
  11247. struct net_device *dev_get_by_index(struct net *net, int ifindex);
  11248. struct net_device *__dev_get_by_index(struct net *net, int ifindex);
  11249. @@ -2510,6 +2531,7 @@
  11250. unsigned int dropped;
  11251. struct sk_buff_head input_pkt_queue;
  11252. struct napi_struct backlog;
  11253. + struct sk_buff_head tofree_queue;
  11254. };
  11255. diff -Nur linux-4.1.39.orig/include/linux/netfilter/x_tables.h linux-4.1.39/include/linux/netfilter/x_tables.h
  11256. --- linux-4.1.39.orig/include/linux/netfilter/x_tables.h 2017-03-13 21:04:36.000000000 +0100
  11257. +++ linux-4.1.39/include/linux/netfilter/x_tables.h 2017-04-18 17:56:30.609396976 +0200
  11258. @@ -3,6 +3,7 @@
  11259. #include <linux/netdevice.h>
  11260. +#include <linux/locallock.h>
  11261. #include <uapi/linux/netfilter/x_tables.h>
  11262. /**
  11263. @@ -293,6 +294,8 @@
  11264. */
  11265. DECLARE_PER_CPU(seqcount_t, xt_recseq);
  11266. +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock);
  11267. +
  11268. /**
  11269. * xt_write_recseq_begin - start of a write section
  11270. *
  11271. @@ -307,6 +310,9 @@
  11272. {
  11273. unsigned int addend;
  11274. + /* RT protection */
  11275. + local_lock(xt_write_lock);
  11276. +
  11277. /*
  11278. * Low order bit of sequence is set if we already
  11279. * called xt_write_recseq_begin().
  11280. @@ -337,6 +343,7 @@
  11281. /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
  11282. smp_wmb();
  11283. __this_cpu_add(xt_recseq.sequence, addend);
  11284. + local_unlock(xt_write_lock);
  11285. }
  11286. /*
  11287. diff -Nur linux-4.1.39.orig/include/linux/notifier.h linux-4.1.39/include/linux/notifier.h
  11288. --- linux-4.1.39.orig/include/linux/notifier.h 2017-03-13 21:04:36.000000000 +0100
  11289. +++ linux-4.1.39/include/linux/notifier.h 2017-04-18 17:56:30.609396976 +0200
  11290. @@ -6,7 +6,7 @@
  11291. *
  11292. * Alan Cox <Alan.Cox@linux.org>
  11293. */
  11294. -
  11295. +
  11296. #ifndef _LINUX_NOTIFIER_H
  11297. #define _LINUX_NOTIFIER_H
  11298. #include <linux/errno.h>
  11299. @@ -42,9 +42,7 @@
  11300. * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
  11301. * As compensation, srcu_notifier_chain_unregister() is rather expensive.
  11302. * SRCU notifier chains should be used when the chain will be called very
  11303. - * often but notifier_blocks will seldom be removed. Also, SRCU notifier
  11304. - * chains are slightly more difficult to use because they require special
  11305. - * runtime initialization.
  11306. + * often but notifier_blocks will seldom be removed.
  11307. */
  11308. typedef int (*notifier_fn_t)(struct notifier_block *nb,
  11309. @@ -88,7 +86,7 @@
  11310. (name)->head = NULL; \
  11311. } while (0)
  11312. -/* srcu_notifier_heads must be initialized and cleaned up dynamically */
  11313. +/* srcu_notifier_heads must be cleaned up dynamically */
  11314. extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  11315. #define srcu_cleanup_notifier_head(name) \
  11316. cleanup_srcu_struct(&(name)->srcu);
  11317. @@ -101,7 +99,13 @@
  11318. .head = NULL }
  11319. #define RAW_NOTIFIER_INIT(name) { \
  11320. .head = NULL }
  11321. -/* srcu_notifier_heads cannot be initialized statically */
  11322. +
  11323. +#define SRCU_NOTIFIER_INIT(name, pcpu) \
  11324. + { \
  11325. + .mutex = __MUTEX_INITIALIZER(name.mutex), \
  11326. + .head = NULL, \
  11327. + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \
  11328. + }
  11329. #define ATOMIC_NOTIFIER_HEAD(name) \
  11330. struct atomic_notifier_head name = \
  11331. @@ -113,6 +117,18 @@
  11332. struct raw_notifier_head name = \
  11333. RAW_NOTIFIER_INIT(name)
  11334. +#define _SRCU_NOTIFIER_HEAD(name, mod) \
  11335. + static DEFINE_PER_CPU(struct srcu_struct_array, \
  11336. + name##_head_srcu_array); \
  11337. + mod struct srcu_notifier_head name = \
  11338. + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array)
  11339. +
  11340. +#define SRCU_NOTIFIER_HEAD(name) \
  11341. + _SRCU_NOTIFIER_HEAD(name, )
  11342. +
  11343. +#define SRCU_NOTIFIER_HEAD_STATIC(name) \
  11344. + _SRCU_NOTIFIER_HEAD(name, static)
  11345. +
  11346. #ifdef __KERNEL__
  11347. extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
  11348. @@ -182,12 +198,12 @@
  11349. /*
  11350. * Declared notifiers so far. I can imagine quite a few more chains
  11351. - * over time (eg laptop power reset chains, reboot chain (to clean
  11352. + * over time (eg laptop power reset chains, reboot chain (to clean
  11353. * device units up), device [un]mount chain, module load/unload chain,
  11354. - * low memory chain, screenblank chain (for plug in modular screenblankers)
  11355. + * low memory chain, screenblank chain (for plug in modular screenblankers)
  11356. * VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
  11357. */
  11358. -
  11359. +
  11360. /* CPU notfiers are defined in include/linux/cpu.h. */
  11361. /* netdevice notifiers are defined in include/linux/netdevice.h */
  11362. diff -Nur linux-4.1.39.orig/include/linux/percpu.h linux-4.1.39/include/linux/percpu.h
  11363. --- linux-4.1.39.orig/include/linux/percpu.h 2017-03-13 21:04:36.000000000 +0100
  11364. +++ linux-4.1.39/include/linux/percpu.h 2017-04-18 17:56:30.609396976 +0200
  11365. @@ -24,6 +24,35 @@
  11366. PERCPU_MODULE_RESERVE)
  11367. #endif
  11368. +#ifdef CONFIG_PREEMPT_RT_FULL
  11369. +
  11370. +#define get_local_var(var) (*({ \
  11371. + migrate_disable(); \
  11372. + this_cpu_ptr(&var); }))
  11373. +
  11374. +#define put_local_var(var) do { \
  11375. + (void)&(var); \
  11376. + migrate_enable(); \
  11377. +} while (0)
  11378. +
  11379. +# define get_local_ptr(var) ({ \
  11380. + migrate_disable(); \
  11381. + this_cpu_ptr(var); })
  11382. +
  11383. +# define put_local_ptr(var) do { \
  11384. + (void)(var); \
  11385. + migrate_enable(); \
  11386. +} while (0)
  11387. +
  11388. +#else
  11389. +
  11390. +#define get_local_var(var) get_cpu_var(var)
  11391. +#define put_local_var(var) put_cpu_var(var)
  11392. +#define get_local_ptr(var) get_cpu_ptr(var)
  11393. +#define put_local_ptr(var) put_cpu_ptr(var)
  11394. +
  11395. +#endif
  11396. +
  11397. /* minimum unit size, also is the maximum supported allocation size */
  11398. #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
  11399. @@ -116,6 +145,7 @@
  11400. #endif
  11401. extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
  11402. +extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr);
  11403. extern bool is_kernel_percpu_address(unsigned long addr);
  11404. #if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
  11405. diff -Nur linux-4.1.39.orig/include/linux/pid.h linux-4.1.39/include/linux/pid.h
  11406. --- linux-4.1.39.orig/include/linux/pid.h 2017-03-13 21:04:36.000000000 +0100
  11407. +++ linux-4.1.39/include/linux/pid.h 2017-04-18 17:56:30.609396976 +0200
  11408. @@ -2,6 +2,7 @@
  11409. #define _LINUX_PID_H
  11410. #include <linux/rcupdate.h>
  11411. +#include <linux/atomic.h>
  11412. enum pid_type
  11413. {
  11414. diff -Nur linux-4.1.39.orig/include/linux/platform_data/gpio-omap.h linux-4.1.39/include/linux/platform_data/gpio-omap.h
  11415. --- linux-4.1.39.orig/include/linux/platform_data/gpio-omap.h 2017-03-13 21:04:36.000000000 +0100
  11416. +++ linux-4.1.39/include/linux/platform_data/gpio-omap.h 2017-04-18 17:56:30.609396976 +0200
  11417. @@ -198,7 +198,6 @@
  11418. int bank_width; /* GPIO bank width */
  11419. int bank_stride; /* Only needed for omap1 MPUIO */
  11420. bool dbck_flag; /* dbck required or not - True for OMAP3&4 */
  11421. - bool loses_context; /* whether the bank would ever lose context */
  11422. bool is_mpuio; /* whether the bank is of type MPUIO */
  11423. u32 non_wakeup_gpios;
  11424. @@ -208,9 +207,17 @@
  11425. int (*get_context_loss_count)(struct device *dev);
  11426. };
  11427. +#if IS_BUILTIN(CONFIG_GPIO_OMAP)
  11428. extern void omap2_gpio_prepare_for_idle(int off_mode);
  11429. extern void omap2_gpio_resume_after_idle(void);
  11430. -extern void omap_set_gpio_debounce(int gpio, int enable);
  11431. -extern void omap_set_gpio_debounce_time(int gpio, int enable);
  11432. +#else
  11433. +static inline void omap2_gpio_prepare_for_idle(int off_mode)
  11434. +{
  11435. +}
  11436. +
  11437. +static inline void omap2_gpio_resume_after_idle(void)
  11438. +{
  11439. +}
  11440. +#endif
  11441. #endif
  11442. diff -Nur linux-4.1.39.orig/include/linux/preempt.h linux-4.1.39/include/linux/preempt.h
  11443. --- linux-4.1.39.orig/include/linux/preempt.h 2017-03-13 21:04:36.000000000 +0100
  11444. +++ linux-4.1.39/include/linux/preempt.h 2017-04-18 17:56:30.609396976 +0200
  11445. @@ -34,6 +34,20 @@
  11446. #define preempt_count_inc() preempt_count_add(1)
  11447. #define preempt_count_dec() preempt_count_sub(1)
  11448. +#ifdef CONFIG_PREEMPT_LAZY
  11449. +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
  11450. +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
  11451. +#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
  11452. +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
  11453. +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
  11454. +#else
  11455. +#define add_preempt_lazy_count(val) do { } while (0)
  11456. +#define sub_preempt_lazy_count(val) do { } while (0)
  11457. +#define inc_preempt_lazy_count() do { } while (0)
  11458. +#define dec_preempt_lazy_count() do { } while (0)
  11459. +#define preempt_lazy_count() (0)
  11460. +#endif
  11461. +
  11462. #ifdef CONFIG_PREEMPT_COUNT
  11463. #define preempt_disable() \
  11464. @@ -42,13 +56,25 @@
  11465. barrier(); \
  11466. } while (0)
  11467. +#define preempt_lazy_disable() \
  11468. +do { \
  11469. + inc_preempt_lazy_count(); \
  11470. + barrier(); \
  11471. +} while (0)
  11472. +
  11473. #define sched_preempt_enable_no_resched() \
  11474. do { \
  11475. barrier(); \
  11476. preempt_count_dec(); \
  11477. } while (0)
  11478. -#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  11479. +#ifdef CONFIG_PREEMPT_RT_BASE
  11480. +# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
  11481. +# define preempt_check_resched_rt() preempt_check_resched()
  11482. +#else
  11483. +# define preempt_enable_no_resched() preempt_enable()
  11484. +# define preempt_check_resched_rt() barrier();
  11485. +#endif
  11486. #ifdef CONFIG_PREEMPT
  11487. #define preempt_enable() \
  11488. @@ -64,6 +90,13 @@
  11489. __preempt_schedule(); \
  11490. } while (0)
  11491. +#define preempt_lazy_enable() \
  11492. +do { \
  11493. + dec_preempt_lazy_count(); \
  11494. + barrier(); \
  11495. + preempt_check_resched(); \
  11496. +} while (0)
  11497. +
  11498. #else
  11499. #define preempt_enable() \
  11500. do { \
  11501. @@ -122,6 +155,7 @@
  11502. #define preempt_disable_notrace() barrier()
  11503. #define preempt_enable_no_resched_notrace() barrier()
  11504. #define preempt_enable_notrace() barrier()
  11505. +#define preempt_check_resched_rt() barrier()
  11506. #endif /* CONFIG_PREEMPT_COUNT */
  11507. @@ -141,10 +175,31 @@
  11508. } while (0)
  11509. #define preempt_fold_need_resched() \
  11510. do { \
  11511. - if (tif_need_resched()) \
  11512. + if (tif_need_resched_now()) \
  11513. set_preempt_need_resched(); \
  11514. } while (0)
  11515. +#ifdef CONFIG_PREEMPT_RT_FULL
  11516. +# define preempt_disable_rt() preempt_disable()
  11517. +# define preempt_enable_rt() preempt_enable()
  11518. +# define preempt_disable_nort() barrier()
  11519. +# define preempt_enable_nort() barrier()
  11520. +# ifdef CONFIG_SMP
  11521. + extern void migrate_disable(void);
  11522. + extern void migrate_enable(void);
  11523. +# else /* CONFIG_SMP */
  11524. +# define migrate_disable() barrier()
  11525. +# define migrate_enable() barrier()
  11526. +# endif /* CONFIG_SMP */
  11527. +#else
  11528. +# define preempt_disable_rt() barrier()
  11529. +# define preempt_enable_rt() barrier()
  11530. +# define preempt_disable_nort() preempt_disable()
  11531. +# define preempt_enable_nort() preempt_enable()
  11532. +# define migrate_disable() preempt_disable()
  11533. +# define migrate_enable() preempt_enable()
  11534. +#endif
  11535. +
  11536. #ifdef CONFIG_PREEMPT_NOTIFIERS
  11537. struct preempt_notifier;
  11538. diff -Nur linux-4.1.39.orig/include/linux/preempt_mask.h linux-4.1.39/include/linux/preempt_mask.h
  11539. --- linux-4.1.39.orig/include/linux/preempt_mask.h 2017-03-13 21:04:36.000000000 +0100
  11540. +++ linux-4.1.39/include/linux/preempt_mask.h 2017-04-18 17:56:30.609396976 +0200
  11541. @@ -44,16 +44,26 @@
  11542. #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
  11543. #define NMI_OFFSET (1UL << NMI_SHIFT)
  11544. -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  11545. +#ifndef CONFIG_PREEMPT_RT_FULL
  11546. +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
  11547. +#else
  11548. +# define SOFTIRQ_DISABLE_OFFSET (0)
  11549. +#endif
  11550. #define PREEMPT_ACTIVE_BITS 1
  11551. #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
  11552. #define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
  11553. #define hardirq_count() (preempt_count() & HARDIRQ_MASK)
  11554. -#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  11555. #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
  11556. | NMI_MASK))
  11557. +#ifndef CONFIG_PREEMPT_RT_FULL
  11558. +# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
  11559. +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  11560. +#else
  11561. +# define softirq_count() (0UL)
  11562. +extern int in_serving_softirq(void);
  11563. +#endif
  11564. /*
  11565. * Are we doing bottom half or hardware interrupt processing?
  11566. @@ -64,7 +74,6 @@
  11567. #define in_irq() (hardirq_count())
  11568. #define in_softirq() (softirq_count())
  11569. #define in_interrupt() (irq_count())
  11570. -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
  11571. /*
  11572. * Are we in NMI context?
  11573. @@ -83,7 +92,11 @@
  11574. /*
  11575. * The preempt_count offset after spin_lock()
  11576. */
  11577. +#if !defined(CONFIG_PREEMPT_RT_FULL)
  11578. #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
  11579. +#else
  11580. +#define PREEMPT_LOCK_OFFSET 0
  11581. +#endif
  11582. /*
  11583. * The preempt_count offset needed for things like:
  11584. diff -Nur linux-4.1.39.orig/include/linux/printk.h linux-4.1.39/include/linux/printk.h
  11585. --- linux-4.1.39.orig/include/linux/printk.h 2017-03-13 21:04:36.000000000 +0100
  11586. +++ linux-4.1.39/include/linux/printk.h 2017-04-18 17:56:30.609396976 +0200
  11587. @@ -115,9 +115,11 @@
  11588. #ifdef CONFIG_EARLY_PRINTK
  11589. extern asmlinkage __printf(1, 2)
  11590. void early_printk(const char *fmt, ...);
  11591. +extern void printk_kill(void);
  11592. #else
  11593. static inline __printf(1, 2) __cold
  11594. void early_printk(const char *s, ...) { }
  11595. +static inline void printk_kill(void) { }
  11596. #endif
  11597. typedef int(*printk_func_t)(const char *fmt, va_list args);
  11598. diff -Nur linux-4.1.39.orig/include/linux/radix-tree.h linux-4.1.39/include/linux/radix-tree.h
  11599. --- linux-4.1.39.orig/include/linux/radix-tree.h 2017-03-13 21:04:36.000000000 +0100
  11600. +++ linux-4.1.39/include/linux/radix-tree.h 2017-04-18 17:56:30.609396976 +0200
  11601. @@ -279,6 +279,8 @@
  11602. unsigned long first_index, unsigned int max_items);
  11603. int radix_tree_preload(gfp_t gfp_mask);
  11604. int radix_tree_maybe_preload(gfp_t gfp_mask);
  11605. +void radix_tree_preload_end(void);
  11606. +
  11607. void radix_tree_init(void);
  11608. void *radix_tree_tag_set(struct radix_tree_root *root,
  11609. unsigned long index, unsigned int tag);
  11610. @@ -301,11 +303,6 @@
  11611. int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
  11612. unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
  11613. -static inline void radix_tree_preload_end(void)
  11614. -{
  11615. - preempt_enable();
  11616. -}
  11617. -
  11618. /**
  11619. * struct radix_tree_iter - radix tree iterator state
  11620. *
  11621. diff -Nur linux-4.1.39.orig/include/linux/random.h linux-4.1.39/include/linux/random.h
  11622. --- linux-4.1.39.orig/include/linux/random.h 2017-03-13 21:04:36.000000000 +0100
  11623. +++ linux-4.1.39/include/linux/random.h 2017-04-18 17:56:30.609396976 +0200
  11624. @@ -11,7 +11,7 @@
  11625. extern void add_device_randomness(const void *, unsigned int);
  11626. extern void add_input_randomness(unsigned int type, unsigned int code,
  11627. unsigned int value);
  11628. -extern void add_interrupt_randomness(int irq, int irq_flags);
  11629. +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip);
  11630. extern void get_random_bytes(void *buf, int nbytes);
  11631. extern void get_random_bytes_arch(void *buf, int nbytes);
  11632. diff -Nur linux-4.1.39.orig/include/linux/rcupdate.h linux-4.1.39/include/linux/rcupdate.h
  11633. --- linux-4.1.39.orig/include/linux/rcupdate.h 2017-03-13 21:04:36.000000000 +0100
  11634. +++ linux-4.1.39/include/linux/rcupdate.h 2017-04-18 17:56:30.609396976 +0200
  11635. @@ -167,6 +167,9 @@
  11636. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  11637. +#ifdef CONFIG_PREEMPT_RT_FULL
  11638. +#define call_rcu_bh call_rcu
  11639. +#else
  11640. /**
  11641. * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
  11642. * @head: structure to be used for queueing the RCU updates.
  11643. @@ -190,6 +193,7 @@
  11644. */
  11645. void call_rcu_bh(struct rcu_head *head,
  11646. void (*func)(struct rcu_head *head));
  11647. +#endif
  11648. /**
  11649. * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
  11650. @@ -260,6 +264,11 @@
  11651. * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
  11652. */
  11653. #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
  11654. +#ifndef CONFIG_PREEMPT_RT_FULL
  11655. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  11656. +#else
  11657. +static inline int sched_rcu_preempt_depth(void) { return 0; }
  11658. +#endif
  11659. #else /* #ifdef CONFIG_PREEMPT_RCU */
  11660. @@ -283,6 +292,8 @@
  11661. return 0;
  11662. }
  11663. +#define sched_rcu_preempt_depth() rcu_preempt_depth()
  11664. +
  11665. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  11666. /* Internal to kernel */
  11667. @@ -463,7 +474,14 @@
  11668. int debug_lockdep_rcu_enabled(void);
  11669. int rcu_read_lock_held(void);
  11670. +#ifdef CONFIG_PREEMPT_RT_FULL
  11671. +static inline int rcu_read_lock_bh_held(void)
  11672. +{
  11673. + return rcu_read_lock_held();
  11674. +}
  11675. +#else
  11676. int rcu_read_lock_bh_held(void);
  11677. +#endif
  11678. /**
  11679. * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
  11680. @@ -990,10 +1008,14 @@
  11681. static inline void rcu_read_lock_bh(void)
  11682. {
  11683. local_bh_disable();
  11684. +#ifdef CONFIG_PREEMPT_RT_FULL
  11685. + rcu_read_lock();
  11686. +#else
  11687. __acquire(RCU_BH);
  11688. rcu_lock_acquire(&rcu_bh_lock_map);
  11689. rcu_lockdep_assert(rcu_is_watching(),
  11690. "rcu_read_lock_bh() used illegally while idle");
  11691. +#endif
  11692. }
  11693. /*
  11694. @@ -1003,10 +1025,14 @@
  11695. */
  11696. static inline void rcu_read_unlock_bh(void)
  11697. {
  11698. +#ifdef CONFIG_PREEMPT_RT_FULL
  11699. + rcu_read_unlock();
  11700. +#else
  11701. rcu_lockdep_assert(rcu_is_watching(),
  11702. "rcu_read_unlock_bh() used illegally while idle");
  11703. rcu_lock_release(&rcu_bh_lock_map);
  11704. __release(RCU_BH);
  11705. +#endif
  11706. local_bh_enable();
  11707. }
  11708. diff -Nur linux-4.1.39.orig/include/linux/rcutree.h linux-4.1.39/include/linux/rcutree.h
  11709. --- linux-4.1.39.orig/include/linux/rcutree.h 2017-03-13 21:04:36.000000000 +0100
  11710. +++ linux-4.1.39/include/linux/rcutree.h 2017-04-18 17:56:30.609396976 +0200
  11711. @@ -46,7 +46,11 @@
  11712. rcu_note_context_switch();
  11713. }
  11714. +#ifdef CONFIG_PREEMPT_RT_FULL
  11715. +# define synchronize_rcu_bh synchronize_rcu
  11716. +#else
  11717. void synchronize_rcu_bh(void);
  11718. +#endif
  11719. void synchronize_sched_expedited(void);
  11720. void synchronize_rcu_expedited(void);
  11721. @@ -74,7 +78,11 @@
  11722. }
  11723. void rcu_barrier(void);
  11724. +#ifdef CONFIG_PREEMPT_RT_FULL
  11725. +# define rcu_barrier_bh rcu_barrier
  11726. +#else
  11727. void rcu_barrier_bh(void);
  11728. +#endif
  11729. void rcu_barrier_sched(void);
  11730. unsigned long get_state_synchronize_rcu(void);
  11731. void cond_synchronize_rcu(unsigned long oldstate);
  11732. @@ -85,12 +93,10 @@
  11733. unsigned long rcu_batches_started_bh(void);
  11734. unsigned long rcu_batches_started_sched(void);
  11735. unsigned long rcu_batches_completed(void);
  11736. -unsigned long rcu_batches_completed_bh(void);
  11737. unsigned long rcu_batches_completed_sched(void);
  11738. void show_rcu_gp_kthreads(void);
  11739. void rcu_force_quiescent_state(void);
  11740. -void rcu_bh_force_quiescent_state(void);
  11741. void rcu_sched_force_quiescent_state(void);
  11742. void exit_rcu(void);
  11743. @@ -100,6 +106,14 @@
  11744. bool rcu_is_watching(void);
  11745. +#ifndef CONFIG_PREEMPT_RT_FULL
  11746. +void rcu_bh_force_quiescent_state(void);
  11747. +unsigned long rcu_batches_completed_bh(void);
  11748. +#else
  11749. +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state
  11750. +# define rcu_batches_completed_bh rcu_batches_completed
  11751. +#endif
  11752. +
  11753. void rcu_all_qs(void);
  11754. #endif /* __LINUX_RCUTREE_H */
  11755. diff -Nur linux-4.1.39.orig/include/linux/rtmutex.h linux-4.1.39/include/linux/rtmutex.h
  11756. --- linux-4.1.39.orig/include/linux/rtmutex.h 2017-03-13 21:04:36.000000000 +0100
  11757. +++ linux-4.1.39/include/linux/rtmutex.h 2017-04-18 17:56:30.609396976 +0200
  11758. @@ -14,10 +14,14 @@
  11759. #include <linux/linkage.h>
  11760. #include <linux/rbtree.h>
  11761. -#include <linux/spinlock_types.h>
  11762. +#include <linux/spinlock_types_raw.h>
  11763. extern int max_lock_depth; /* for sysctl */
  11764. +#ifdef CONFIG_DEBUG_MUTEXES
  11765. +#include <linux/debug_locks.h>
  11766. +#endif
  11767. +
  11768. /**
  11769. * The rt_mutex structure
  11770. *
  11771. @@ -31,8 +35,8 @@
  11772. struct rb_root waiters;
  11773. struct rb_node *waiters_leftmost;
  11774. struct task_struct *owner;
  11775. -#ifdef CONFIG_DEBUG_RT_MUTEXES
  11776. int save_state;
  11777. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  11778. const char *name, *file;
  11779. int line;
  11780. void *magic;
  11781. @@ -55,22 +59,33 @@
  11782. # define rt_mutex_debug_check_no_locks_held(task) do { } while (0)
  11783. #endif
  11784. +# define rt_mutex_init(mutex) \
  11785. + do { \
  11786. + raw_spin_lock_init(&(mutex)->wait_lock); \
  11787. + __rt_mutex_init(mutex, #mutex); \
  11788. + } while (0)
  11789. +
  11790. #ifdef CONFIG_DEBUG_RT_MUTEXES
  11791. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
  11792. , .name = #mutexname, .file = __FILE__, .line = __LINE__
  11793. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__)
  11794. extern void rt_mutex_debug_task_free(struct task_struct *tsk);
  11795. #else
  11796. # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  11797. -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL)
  11798. # define rt_mutex_debug_task_free(t) do { } while (0)
  11799. #endif
  11800. -#define __RT_MUTEX_INITIALIZER(mutexname) \
  11801. - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  11802. +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  11803. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
  11804. , .waiters = RB_ROOT \
  11805. , .owner = NULL \
  11806. - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
  11807. + __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
  11808. +
  11809. +#define __RT_MUTEX_INITIALIZER(mutexname) \
  11810. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
  11811. +
  11812. +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
  11813. + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
  11814. + , .save_state = 1 }
  11815. #define DEFINE_RT_MUTEX(mutexname) \
  11816. struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
  11817. @@ -91,6 +106,7 @@
  11818. extern void rt_mutex_lock(struct rt_mutex *lock);
  11819. extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
  11820. +extern int rt_mutex_lock_killable(struct rt_mutex *lock);
  11821. extern int rt_mutex_timed_lock(struct rt_mutex *lock,
  11822. struct hrtimer_sleeper *timeout);
  11823. diff -Nur linux-4.1.39.orig/include/linux/rwlock_rt.h linux-4.1.39/include/linux/rwlock_rt.h
  11824. --- linux-4.1.39.orig/include/linux/rwlock_rt.h 1970-01-01 01:00:00.000000000 +0100
  11825. +++ linux-4.1.39/include/linux/rwlock_rt.h 2017-04-18 17:56:30.609396976 +0200
  11826. @@ -0,0 +1,99 @@
  11827. +#ifndef __LINUX_RWLOCK_RT_H
  11828. +#define __LINUX_RWLOCK_RT_H
  11829. +
  11830. +#ifndef __LINUX_SPINLOCK_H
  11831. +#error Do not include directly. Use spinlock.h
  11832. +#endif
  11833. +
  11834. +#define rwlock_init(rwl) \
  11835. +do { \
  11836. + static struct lock_class_key __key; \
  11837. + \
  11838. + rt_mutex_init(&(rwl)->lock); \
  11839. + __rt_rwlock_init(rwl, #rwl, &__key); \
  11840. +} while (0)
  11841. +
  11842. +extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
  11843. +extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
  11844. +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
  11845. +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags);
  11846. +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
  11847. +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
  11848. +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
  11849. +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
  11850. +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
  11851. +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
  11852. +
  11853. +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock))
  11854. +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock))
  11855. +
  11856. +#define write_trylock_irqsave(lock, flags) \
  11857. + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags))
  11858. +
  11859. +#define read_lock_irqsave(lock, flags) \
  11860. + do { \
  11861. + typecheck(unsigned long, flags); \
  11862. + flags = rt_read_lock_irqsave(lock); \
  11863. + } while (0)
  11864. +
  11865. +#define write_lock_irqsave(lock, flags) \
  11866. + do { \
  11867. + typecheck(unsigned long, flags); \
  11868. + flags = rt_write_lock_irqsave(lock); \
  11869. + } while (0)
  11870. +
  11871. +#define read_lock(lock) rt_read_lock(lock)
  11872. +
  11873. +#define read_lock_bh(lock) \
  11874. + do { \
  11875. + local_bh_disable(); \
  11876. + rt_read_lock(lock); \
  11877. + } while (0)
  11878. +
  11879. +#define read_lock_irq(lock) read_lock(lock)
  11880. +
  11881. +#define write_lock(lock) rt_write_lock(lock)
  11882. +
  11883. +#define write_lock_bh(lock) \
  11884. + do { \
  11885. + local_bh_disable(); \
  11886. + rt_write_lock(lock); \
  11887. + } while (0)
  11888. +
  11889. +#define write_lock_irq(lock) write_lock(lock)
  11890. +
  11891. +#define read_unlock(lock) rt_read_unlock(lock)
  11892. +
  11893. +#define read_unlock_bh(lock) \
  11894. + do { \
  11895. + rt_read_unlock(lock); \
  11896. + local_bh_enable(); \
  11897. + } while (0)
  11898. +
  11899. +#define read_unlock_irq(lock) read_unlock(lock)
  11900. +
  11901. +#define write_unlock(lock) rt_write_unlock(lock)
  11902. +
  11903. +#define write_unlock_bh(lock) \
  11904. + do { \
  11905. + rt_write_unlock(lock); \
  11906. + local_bh_enable(); \
  11907. + } while (0)
  11908. +
  11909. +#define write_unlock_irq(lock) write_unlock(lock)
  11910. +
  11911. +#define read_unlock_irqrestore(lock, flags) \
  11912. + do { \
  11913. + typecheck(unsigned long, flags); \
  11914. + (void) flags; \
  11915. + rt_read_unlock(lock); \
  11916. + } while (0)
  11917. +
  11918. +#define write_unlock_irqrestore(lock, flags) \
  11919. + do { \
  11920. + typecheck(unsigned long, flags); \
  11921. + (void) flags; \
  11922. + rt_write_unlock(lock); \
  11923. + } while (0)
  11924. +
  11925. +#endif
  11926. diff -Nur linux-4.1.39.orig/include/linux/rwlock_types.h linux-4.1.39/include/linux/rwlock_types.h
  11927. --- linux-4.1.39.orig/include/linux/rwlock_types.h 2017-03-13 21:04:36.000000000 +0100
  11928. +++ linux-4.1.39/include/linux/rwlock_types.h 2017-04-18 17:56:30.609396976 +0200
  11929. @@ -1,6 +1,10 @@
  11930. #ifndef __LINUX_RWLOCK_TYPES_H
  11931. #define __LINUX_RWLOCK_TYPES_H
  11932. +#if !defined(__LINUX_SPINLOCK_TYPES_H)
  11933. +# error "Do not include directly, include spinlock_types.h"
  11934. +#endif
  11935. +
  11936. /*
  11937. * include/linux/rwlock_types.h - generic rwlock type definitions
  11938. * and initializers
  11939. @@ -43,6 +47,7 @@
  11940. RW_DEP_MAP_INIT(lockname) }
  11941. #endif
  11942. -#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
  11943. +#define DEFINE_RWLOCK(name) \
  11944. + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
  11945. #endif /* __LINUX_RWLOCK_TYPES_H */
  11946. diff -Nur linux-4.1.39.orig/include/linux/rwlock_types_rt.h linux-4.1.39/include/linux/rwlock_types_rt.h
  11947. --- linux-4.1.39.orig/include/linux/rwlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100
  11948. +++ linux-4.1.39/include/linux/rwlock_types_rt.h 2017-04-18 17:56:30.609396976 +0200
  11949. @@ -0,0 +1,33 @@
  11950. +#ifndef __LINUX_RWLOCK_TYPES_RT_H
  11951. +#define __LINUX_RWLOCK_TYPES_RT_H
  11952. +
  11953. +#ifndef __LINUX_SPINLOCK_TYPES_H
  11954. +#error "Do not include directly. Include spinlock_types.h instead"
  11955. +#endif
  11956. +
  11957. +/*
  11958. + * rwlocks - rtmutex which allows single reader recursion
  11959. + */
  11960. +typedef struct {
  11961. + struct rt_mutex lock;
  11962. + int read_depth;
  11963. + unsigned int break_lock;
  11964. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11965. + struct lockdep_map dep_map;
  11966. +#endif
  11967. +} rwlock_t;
  11968. +
  11969. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  11970. +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  11971. +#else
  11972. +# define RW_DEP_MAP_INIT(lockname)
  11973. +#endif
  11974. +
  11975. +#define __RW_LOCK_UNLOCKED(name) \
  11976. + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \
  11977. + RW_DEP_MAP_INIT(name) }
  11978. +
  11979. +#define DEFINE_RWLOCK(name) \
  11980. + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
  11981. +
  11982. +#endif
  11983. diff -Nur linux-4.1.39.orig/include/linux/rwsem.h linux-4.1.39/include/linux/rwsem.h
  11984. --- linux-4.1.39.orig/include/linux/rwsem.h 2017-03-13 21:04:36.000000000 +0100
  11985. +++ linux-4.1.39/include/linux/rwsem.h 2017-04-18 17:56:30.609396976 +0200
  11986. @@ -18,6 +18,10 @@
  11987. #include <linux/osq_lock.h>
  11988. #endif
  11989. +#ifdef CONFIG_PREEMPT_RT_FULL
  11990. +#include <linux/rwsem_rt.h>
  11991. +#else /* PREEMPT_RT_FULL */
  11992. +
  11993. struct rw_semaphore;
  11994. #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
  11995. @@ -177,4 +181,6 @@
  11996. # define up_read_non_owner(sem) up_read(sem)
  11997. #endif
  11998. +#endif /* !PREEMPT_RT_FULL */
  11999. +
  12000. #endif /* _LINUX_RWSEM_H */
  12001. diff -Nur linux-4.1.39.orig/include/linux/rwsem_rt.h linux-4.1.39/include/linux/rwsem_rt.h
  12002. --- linux-4.1.39.orig/include/linux/rwsem_rt.h 1970-01-01 01:00:00.000000000 +0100
  12003. +++ linux-4.1.39/include/linux/rwsem_rt.h 2017-04-18 17:56:30.609396976 +0200
  12004. @@ -0,0 +1,140 @@
  12005. +#ifndef _LINUX_RWSEM_RT_H
  12006. +#define _LINUX_RWSEM_RT_H
  12007. +
  12008. +#ifndef _LINUX_RWSEM_H
  12009. +#error "Include rwsem.h"
  12010. +#endif
  12011. +
  12012. +/*
  12013. + * RW-semaphores are a spinlock plus a reader-depth count.
  12014. + *
  12015. + * Note that the semantics are different from the usual
  12016. + * Linux rw-sems, in PREEMPT_RT mode we do not allow
  12017. + * multiple readers to hold the lock at once, we only allow
  12018. + * a read-lock owner to read-lock recursively. This is
  12019. + * better for latency, makes the implementation inherently
  12020. + * fair and makes it simpler as well.
  12021. + */
  12022. +
  12023. +#include <linux/rtmutex.h>
  12024. +
  12025. +struct rw_semaphore {
  12026. + struct rt_mutex lock;
  12027. + int read_depth;
  12028. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12029. + struct lockdep_map dep_map;
  12030. +#endif
  12031. +};
  12032. +
  12033. +#define __RWSEM_INITIALIZER(name) \
  12034. + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \
  12035. + RW_DEP_MAP_INIT(name) }
  12036. +
  12037. +#define DECLARE_RWSEM(lockname) \
  12038. + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
  12039. +
  12040. +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
  12041. + struct lock_class_key *key);
  12042. +
  12043. +#define __rt_init_rwsem(sem, name, key) \
  12044. + do { \
  12045. + rt_mutex_init(&(sem)->lock); \
  12046. + __rt_rwsem_init((sem), (name), (key));\
  12047. + } while (0)
  12048. +
  12049. +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key)
  12050. +
  12051. +# define rt_init_rwsem(sem) \
  12052. +do { \
  12053. + static struct lock_class_key __key; \
  12054. + \
  12055. + __rt_init_rwsem((sem), #sem, &__key); \
  12056. +} while (0)
  12057. +
  12058. +extern void rt_down_write(struct rw_semaphore *rwsem);
  12059. +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass);
  12060. +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass);
  12061. +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
  12062. + struct lockdep_map *nest);
  12063. +extern void rt_down_read(struct rw_semaphore *rwsem);
  12064. +extern int rt_down_write_trylock(struct rw_semaphore *rwsem);
  12065. +extern int rt_down_read_trylock(struct rw_semaphore *rwsem);
  12066. +extern void __rt_up_read(struct rw_semaphore *rwsem);
  12067. +extern void rt_up_read(struct rw_semaphore *rwsem);
  12068. +extern void rt_up_write(struct rw_semaphore *rwsem);
  12069. +extern void rt_downgrade_write(struct rw_semaphore *rwsem);
  12070. +
  12071. +#define init_rwsem(sem) rt_init_rwsem(sem)
  12072. +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock)
  12073. +
  12074. +static inline int rwsem_is_contended(struct rw_semaphore *sem)
  12075. +{
  12076. + /* rt_mutex_has_waiters() */
  12077. + return !RB_EMPTY_ROOT(&sem->lock.waiters);
  12078. +}
  12079. +
  12080. +static inline void down_read(struct rw_semaphore *sem)
  12081. +{
  12082. + rt_down_read(sem);
  12083. +}
  12084. +
  12085. +static inline int down_read_trylock(struct rw_semaphore *sem)
  12086. +{
  12087. + return rt_down_read_trylock(sem);
  12088. +}
  12089. +
  12090. +static inline void down_write(struct rw_semaphore *sem)
  12091. +{
  12092. + rt_down_write(sem);
  12093. +}
  12094. +
  12095. +static inline int down_write_trylock(struct rw_semaphore *sem)
  12096. +{
  12097. + return rt_down_write_trylock(sem);
  12098. +}
  12099. +
  12100. +static inline void __up_read(struct rw_semaphore *sem)
  12101. +{
  12102. + __rt_up_read(sem);
  12103. +}
  12104. +
  12105. +static inline void up_read(struct rw_semaphore *sem)
  12106. +{
  12107. + rt_up_read(sem);
  12108. +}
  12109. +
  12110. +static inline void up_write(struct rw_semaphore *sem)
  12111. +{
  12112. + rt_up_write(sem);
  12113. +}
  12114. +
  12115. +static inline void downgrade_write(struct rw_semaphore *sem)
  12116. +{
  12117. + rt_downgrade_write(sem);
  12118. +}
  12119. +
  12120. +static inline void down_read_nested(struct rw_semaphore *sem, int subclass)
  12121. +{
  12122. + return rt_down_read_nested(sem, subclass);
  12123. +}
  12124. +
  12125. +static inline void down_write_nested(struct rw_semaphore *sem, int subclass)
  12126. +{
  12127. + rt_down_write_nested(sem, subclass);
  12128. +}
  12129. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12130. +static inline void down_write_nest_lock(struct rw_semaphore *sem,
  12131. + struct rw_semaphore *nest_lock)
  12132. +{
  12133. + rt_down_write_nested_lock(sem, &nest_lock->dep_map);
  12134. +}
  12135. +
  12136. +#else
  12137. +
  12138. +static inline void down_write_nest_lock(struct rw_semaphore *sem,
  12139. + struct rw_semaphore *nest_lock)
  12140. +{
  12141. + rt_down_write_nested_lock(sem, NULL);
  12142. +}
  12143. +#endif
  12144. +#endif
  12145. diff -Nur linux-4.1.39.orig/include/linux/sched.h linux-4.1.39/include/linux/sched.h
  12146. --- linux-4.1.39.orig/include/linux/sched.h 2017-03-13 21:04:36.000000000 +0100
  12147. +++ linux-4.1.39/include/linux/sched.h 2017-04-18 17:56:30.609396976 +0200
  12148. @@ -26,6 +26,7 @@
  12149. #include <linux/nodemask.h>
  12150. #include <linux/mm_types.h>
  12151. #include <linux/preempt_mask.h>
  12152. +#include <asm/kmap_types.h>
  12153. #include <asm/page.h>
  12154. #include <asm/ptrace.h>
  12155. @@ -175,8 +176,6 @@
  12156. extern void calc_global_load(unsigned long ticks);
  12157. extern void update_cpu_load_nohz(void);
  12158. -extern unsigned long get_parent_ip(unsigned long addr);
  12159. -
  12160. extern void dump_cpu_task(int cpu);
  12161. struct seq_file;
  12162. @@ -234,10 +233,7 @@
  12163. TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
  12164. __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
  12165. -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
  12166. #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
  12167. -#define task_is_stopped_or_traced(task) \
  12168. - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
  12169. #define task_contributes_to_load(task) \
  12170. ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
  12171. (task->flags & PF_FROZEN) == 0)
  12172. @@ -302,6 +298,11 @@
  12173. #endif
  12174. +#define __set_current_state_no_track(state_value) \
  12175. + do { current->state = (state_value); } while (0)
  12176. +#define set_current_state_no_track(state_value) \
  12177. + set_mb(current->state, (state_value))
  12178. +
  12179. /* Task command name length */
  12180. #define TASK_COMM_LEN 16
  12181. @@ -902,6 +903,50 @@
  12182. #define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
  12183. /*
  12184. + * Wake-queues are lists of tasks with a pending wakeup, whose
  12185. + * callers have already marked the task as woken internally,
  12186. + * and can thus carry on. A common use case is being able to
  12187. + * do the wakeups once the corresponding user lock as been
  12188. + * released.
  12189. + *
  12190. + * We hold reference to each task in the list across the wakeup,
  12191. + * thus guaranteeing that the memory is still valid by the time
  12192. + * the actual wakeups are performed in wake_up_q().
  12193. + *
  12194. + * One per task suffices, because there's never a need for a task to be
  12195. + * in two wake queues simultaneously; it is forbidden to abandon a task
  12196. + * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
  12197. + * already in a wake queue, the wakeup will happen soon and the second
  12198. + * waker can just skip it.
  12199. + *
  12200. + * The WAKE_Q macro declares and initializes the list head.
  12201. + * wake_up_q() does NOT reinitialize the list; it's expected to be
  12202. + * called near the end of a function, where the fact that the queue is
  12203. + * not used again will be easy to see by inspection.
  12204. + *
  12205. + * Note that this can cause spurious wakeups. schedule() callers
  12206. + * must ensure the call is done inside a loop, confirming that the
  12207. + * wakeup condition has in fact occurred.
  12208. + */
  12209. +struct wake_q_node {
  12210. + struct wake_q_node *next;
  12211. +};
  12212. +
  12213. +struct wake_q_head {
  12214. + struct wake_q_node *first;
  12215. + struct wake_q_node **lastp;
  12216. +};
  12217. +
  12218. +#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
  12219. +
  12220. +#define WAKE_Q(name) \
  12221. + struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
  12222. +
  12223. +extern void wake_q_add(struct wake_q_head *head,
  12224. + struct task_struct *task);
  12225. +extern void wake_up_q(struct wake_q_head *head);
  12226. +
  12227. +/*
  12228. * sched-domains (multiprocessor balancing) declarations:
  12229. */
  12230. #ifdef CONFIG_SMP
  12231. @@ -1293,6 +1338,7 @@
  12232. struct task_struct {
  12233. volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
  12234. + volatile long saved_state; /* saved state for "spinlock sleepers" */
  12235. void *stack;
  12236. atomic_t usage;
  12237. unsigned int flags; /* per process flags, defined below */
  12238. @@ -1329,6 +1375,12 @@
  12239. #endif
  12240. unsigned int policy;
  12241. +#ifdef CONFIG_PREEMPT_RT_FULL
  12242. + int migrate_disable;
  12243. +# ifdef CONFIG_SCHED_DEBUG
  12244. + int migrate_disable_atomic;
  12245. +# endif
  12246. +#endif
  12247. int nr_cpus_allowed;
  12248. cpumask_t cpus_allowed;
  12249. @@ -1436,7 +1488,8 @@
  12250. struct cputime prev_cputime;
  12251. #endif
  12252. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  12253. - seqlock_t vtime_seqlock;
  12254. + raw_spinlock_t vtime_lock;
  12255. + seqcount_t vtime_seq;
  12256. unsigned long long vtime_snap;
  12257. enum {
  12258. VTIME_SLEEPING = 0,
  12259. @@ -1452,6 +1505,9 @@
  12260. struct task_cputime cputime_expires;
  12261. struct list_head cpu_timers[3];
  12262. +#ifdef CONFIG_PREEMPT_RT_BASE
  12263. + struct task_struct *posix_timer_list;
  12264. +#endif
  12265. /* process credentials */
  12266. const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
  12267. @@ -1485,10 +1541,15 @@
  12268. /* signal handlers */
  12269. struct signal_struct *signal;
  12270. struct sighand_struct *sighand;
  12271. + struct sigqueue *sigqueue_cache;
  12272. sigset_t blocked, real_blocked;
  12273. sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
  12274. struct sigpending pending;
  12275. +#ifdef CONFIG_PREEMPT_RT_FULL
  12276. + /* TODO: move me into ->restart_block ? */
  12277. + struct siginfo forced_info;
  12278. +#endif
  12279. unsigned long sas_ss_sp;
  12280. size_t sas_ss_size;
  12281. @@ -1514,6 +1575,8 @@
  12282. /* Protection of the PI data structures: */
  12283. raw_spinlock_t pi_lock;
  12284. + struct wake_q_node wake_q;
  12285. +
  12286. #ifdef CONFIG_RT_MUTEXES
  12287. /* PI waiters blocked on a rt_mutex held by this task */
  12288. struct rb_root pi_waiters;
  12289. @@ -1708,6 +1771,12 @@
  12290. unsigned long trace;
  12291. /* bitmask and counter of trace recursion */
  12292. unsigned long trace_recursion;
  12293. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  12294. + u64 preempt_timestamp_hist;
  12295. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  12296. + long timer_offset;
  12297. +#endif
  12298. +#endif
  12299. #endif /* CONFIG_TRACING */
  12300. #ifdef CONFIG_MEMCG
  12301. struct memcg_oom_info {
  12302. @@ -1724,14 +1793,26 @@
  12303. unsigned int sequential_io;
  12304. unsigned int sequential_io_avg;
  12305. #endif
  12306. +#ifdef CONFIG_PREEMPT_RT_BASE
  12307. + struct rcu_head put_rcu;
  12308. + int softirq_nestcnt;
  12309. + unsigned int softirqs_raised;
  12310. +#endif
  12311. +#ifdef CONFIG_PREEMPT_RT_FULL
  12312. +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
  12313. + int kmap_idx;
  12314. + pte_t kmap_pte[KM_TYPE_NR];
  12315. +# endif
  12316. +#endif
  12317. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  12318. unsigned long task_state_change;
  12319. #endif
  12320. +#ifdef CONFIG_PREEMPT_RT_FULL
  12321. + int xmit_recursion;
  12322. +#endif
  12323. + int pagefault_disabled;
  12324. };
  12325. -/* Future-safe accessor for struct task_struct's cpus_allowed. */
  12326. -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  12327. -
  12328. #define TNF_MIGRATED 0x01
  12329. #define TNF_NO_GROUP 0x02
  12330. #define TNF_SHARED 0x04
  12331. @@ -1920,6 +2001,15 @@
  12332. extern void free_task(struct task_struct *tsk);
  12333. #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
  12334. +#ifdef CONFIG_PREEMPT_RT_BASE
  12335. +extern void __put_task_struct_cb(struct rcu_head *rhp);
  12336. +
  12337. +static inline void put_task_struct(struct task_struct *t)
  12338. +{
  12339. + if (atomic_dec_and_test(&t->usage))
  12340. + call_rcu(&t->put_rcu, __put_task_struct_cb);
  12341. +}
  12342. +#else
  12343. extern void __put_task_struct(struct task_struct *t);
  12344. static inline void put_task_struct(struct task_struct *t)
  12345. @@ -1927,6 +2017,7 @@
  12346. if (atomic_dec_and_test(&t->usage))
  12347. __put_task_struct(t);
  12348. }
  12349. +#endif
  12350. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  12351. extern void task_cputime(struct task_struct *t,
  12352. @@ -1965,6 +2056,7 @@
  12353. /*
  12354. * Per process flags
  12355. */
  12356. +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */
  12357. #define PF_EXITING 0x00000004 /* getting shut down */
  12358. #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
  12359. #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
  12360. @@ -2129,6 +2221,10 @@
  12361. extern int set_cpus_allowed_ptr(struct task_struct *p,
  12362. const struct cpumask *new_mask);
  12363. +int migrate_me(void);
  12364. +void tell_sched_cpu_down_begin(int cpu);
  12365. +void tell_sched_cpu_down_done(int cpu);
  12366. +
  12367. #else
  12368. static inline void do_set_cpus_allowed(struct task_struct *p,
  12369. const struct cpumask *new_mask)
  12370. @@ -2141,6 +2237,9 @@
  12371. return -EINVAL;
  12372. return 0;
  12373. }
  12374. +static inline int migrate_me(void) { return 0; }
  12375. +static inline void tell_sched_cpu_down_begin(int cpu) { }
  12376. +static inline void tell_sched_cpu_down_done(int cpu) { }
  12377. #endif
  12378. #ifdef CONFIG_NO_HZ_COMMON
  12379. @@ -2357,6 +2456,7 @@
  12380. extern int wake_up_state(struct task_struct *tsk, unsigned int state);
  12381. extern int wake_up_process(struct task_struct *tsk);
  12382. +extern int wake_up_lock_sleeper(struct task_struct * tsk);
  12383. extern void wake_up_new_task(struct task_struct *tsk);
  12384. #ifdef CONFIG_SMP
  12385. extern void kick_process(struct task_struct *tsk);
  12386. @@ -2473,12 +2573,24 @@
  12387. /* mmdrop drops the mm and the page tables */
  12388. extern void __mmdrop(struct mm_struct *);
  12389. +
  12390. static inline void mmdrop(struct mm_struct * mm)
  12391. {
  12392. if (unlikely(atomic_dec_and_test(&mm->mm_count)))
  12393. __mmdrop(mm);
  12394. }
  12395. +#ifdef CONFIG_PREEMPT_RT_BASE
  12396. +extern void __mmdrop_delayed(struct rcu_head *rhp);
  12397. +static inline void mmdrop_delayed(struct mm_struct *mm)
  12398. +{
  12399. + if (atomic_dec_and_test(&mm->mm_count))
  12400. + call_rcu(&mm->delayed_drop, __mmdrop_delayed);
  12401. +}
  12402. +#else
  12403. +# define mmdrop_delayed(mm) mmdrop(mm)
  12404. +#endif
  12405. +
  12406. /* mmput gets rid of the mappings and all user-space */
  12407. extern void mmput(struct mm_struct *);
  12408. /* Grab a reference to a task's mm, if it is not already going away */
  12409. @@ -2790,6 +2902,43 @@
  12410. return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
  12411. }
  12412. +#ifdef CONFIG_PREEMPT_LAZY
  12413. +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
  12414. +{
  12415. + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  12416. +}
  12417. +
  12418. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
  12419. +{
  12420. + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
  12421. +}
  12422. +
  12423. +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
  12424. +{
  12425. + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
  12426. +}
  12427. +
  12428. +static inline int need_resched_lazy(void)
  12429. +{
  12430. + return test_thread_flag(TIF_NEED_RESCHED_LAZY);
  12431. +}
  12432. +
  12433. +static inline int need_resched_now(void)
  12434. +{
  12435. + return test_thread_flag(TIF_NEED_RESCHED);
  12436. +}
  12437. +
  12438. +#else
  12439. +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
  12440. +static inline int need_resched_lazy(void) { return 0; }
  12441. +
  12442. +static inline int need_resched_now(void)
  12443. +{
  12444. + return test_thread_flag(TIF_NEED_RESCHED);
  12445. +}
  12446. +
  12447. +#endif
  12448. +
  12449. static inline int restart_syscall(void)
  12450. {
  12451. set_tsk_thread_flag(current, TIF_SIGPENDING);
  12452. @@ -2821,6 +2970,51 @@
  12453. return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
  12454. }
  12455. +static inline bool __task_is_stopped_or_traced(struct task_struct *task)
  12456. +{
  12457. + if (task->state & (__TASK_STOPPED | __TASK_TRACED))
  12458. + return true;
  12459. +#ifdef CONFIG_PREEMPT_RT_FULL
  12460. + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
  12461. + return true;
  12462. +#endif
  12463. + return false;
  12464. +}
  12465. +
  12466. +static inline bool task_is_stopped_or_traced(struct task_struct *task)
  12467. +{
  12468. + bool traced_stopped;
  12469. +
  12470. +#ifdef CONFIG_PREEMPT_RT_FULL
  12471. + unsigned long flags;
  12472. +
  12473. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  12474. + traced_stopped = __task_is_stopped_or_traced(task);
  12475. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  12476. +#else
  12477. + traced_stopped = __task_is_stopped_or_traced(task);
  12478. +#endif
  12479. + return traced_stopped;
  12480. +}
  12481. +
  12482. +static inline bool task_is_traced(struct task_struct *task)
  12483. +{
  12484. + bool traced = false;
  12485. +
  12486. + if (task->state & __TASK_TRACED)
  12487. + return true;
  12488. +#ifdef CONFIG_PREEMPT_RT_FULL
  12489. + /* in case the task is sleeping on tasklist_lock */
  12490. + raw_spin_lock_irq(&task->pi_lock);
  12491. + if (task->state & __TASK_TRACED)
  12492. + traced = true;
  12493. + else if (task->saved_state & __TASK_TRACED)
  12494. + traced = true;
  12495. + raw_spin_unlock_irq(&task->pi_lock);
  12496. +#endif
  12497. + return traced;
  12498. +}
  12499. +
  12500. /*
  12501. * cond_resched() and cond_resched_lock(): latency reduction via
  12502. * explicit rescheduling in places that are safe. The return
  12503. @@ -2842,12 +3036,16 @@
  12504. __cond_resched_lock(lock); \
  12505. })
  12506. +#ifndef CONFIG_PREEMPT_RT_FULL
  12507. extern int __cond_resched_softirq(void);
  12508. #define cond_resched_softirq() ({ \
  12509. ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
  12510. __cond_resched_softirq(); \
  12511. })
  12512. +#else
  12513. +# define cond_resched_softirq() cond_resched()
  12514. +#endif
  12515. static inline void cond_resched_rcu(void)
  12516. {
  12517. @@ -3014,6 +3212,26 @@
  12518. #endif /* CONFIG_SMP */
  12519. +static inline int __migrate_disabled(struct task_struct *p)
  12520. +{
  12521. +#ifdef CONFIG_PREEMPT_RT_FULL
  12522. + return p->migrate_disable;
  12523. +#else
  12524. + return 0;
  12525. +#endif
  12526. +}
  12527. +
  12528. +/* Future-safe accessor for struct task_struct's cpus_allowed. */
  12529. +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p)
  12530. +{
  12531. +#ifdef CONFIG_PREEMPT_RT_FULL
  12532. + if (p->migrate_disable)
  12533. + return cpumask_of(task_cpu(p));
  12534. +#endif
  12535. +
  12536. + return &p->cpus_allowed;
  12537. +}
  12538. +
  12539. extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
  12540. extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
  12541. diff -Nur linux-4.1.39.orig/include/linux/seqlock.h linux-4.1.39/include/linux/seqlock.h
  12542. --- linux-4.1.39.orig/include/linux/seqlock.h 2017-03-13 21:04:36.000000000 +0100
  12543. +++ linux-4.1.39/include/linux/seqlock.h 2017-04-18 17:56:30.609396976 +0200
  12544. @@ -219,20 +219,30 @@
  12545. return __read_seqcount_retry(s, start);
  12546. }
  12547. -
  12548. -
  12549. -static inline void raw_write_seqcount_begin(seqcount_t *s)
  12550. +static inline void __raw_write_seqcount_begin(seqcount_t *s)
  12551. {
  12552. s->sequence++;
  12553. smp_wmb();
  12554. }
  12555. -static inline void raw_write_seqcount_end(seqcount_t *s)
  12556. +static inline void raw_write_seqcount_begin(seqcount_t *s)
  12557. +{
  12558. + preempt_disable_rt();
  12559. + __raw_write_seqcount_begin(s);
  12560. +}
  12561. +
  12562. +static inline void __raw_write_seqcount_end(seqcount_t *s)
  12563. {
  12564. smp_wmb();
  12565. s->sequence++;
  12566. }
  12567. +static inline void raw_write_seqcount_end(seqcount_t *s)
  12568. +{
  12569. + __raw_write_seqcount_end(s);
  12570. + preempt_enable_rt();
  12571. +}
  12572. +
  12573. /*
  12574. * raw_write_seqcount_latch - redirect readers to even/odd copy
  12575. * @s: pointer to seqcount_t
  12576. @@ -305,10 +315,32 @@
  12577. /*
  12578. * Read side functions for starting and finalizing a read side section.
  12579. */
  12580. +#ifndef CONFIG_PREEMPT_RT_FULL
  12581. static inline unsigned read_seqbegin(const seqlock_t *sl)
  12582. {
  12583. return read_seqcount_begin(&sl->seqcount);
  12584. }
  12585. +#else
  12586. +/*
  12587. + * Starvation safe read side for RT
  12588. + */
  12589. +static inline unsigned read_seqbegin(seqlock_t *sl)
  12590. +{
  12591. + unsigned ret;
  12592. +
  12593. +repeat:
  12594. + ret = ACCESS_ONCE(sl->seqcount.sequence);
  12595. + if (unlikely(ret & 1)) {
  12596. + /*
  12597. + * Take the lock and let the writer proceed (i.e. evtl
  12598. + * boost it), otherwise we could loop here forever.
  12599. + */
  12600. + spin_unlock_wait(&sl->lock);
  12601. + goto repeat;
  12602. + }
  12603. + return ret;
  12604. +}
  12605. +#endif
  12606. static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
  12607. {
  12608. @@ -323,36 +355,36 @@
  12609. static inline void write_seqlock(seqlock_t *sl)
  12610. {
  12611. spin_lock(&sl->lock);
  12612. - write_seqcount_begin(&sl->seqcount);
  12613. + __raw_write_seqcount_begin(&sl->seqcount);
  12614. }
  12615. static inline void write_sequnlock(seqlock_t *sl)
  12616. {
  12617. - write_seqcount_end(&sl->seqcount);
  12618. + __raw_write_seqcount_end(&sl->seqcount);
  12619. spin_unlock(&sl->lock);
  12620. }
  12621. static inline void write_seqlock_bh(seqlock_t *sl)
  12622. {
  12623. spin_lock_bh(&sl->lock);
  12624. - write_seqcount_begin(&sl->seqcount);
  12625. + __raw_write_seqcount_begin(&sl->seqcount);
  12626. }
  12627. static inline void write_sequnlock_bh(seqlock_t *sl)
  12628. {
  12629. - write_seqcount_end(&sl->seqcount);
  12630. + __raw_write_seqcount_end(&sl->seqcount);
  12631. spin_unlock_bh(&sl->lock);
  12632. }
  12633. static inline void write_seqlock_irq(seqlock_t *sl)
  12634. {
  12635. spin_lock_irq(&sl->lock);
  12636. - write_seqcount_begin(&sl->seqcount);
  12637. + __raw_write_seqcount_begin(&sl->seqcount);
  12638. }
  12639. static inline void write_sequnlock_irq(seqlock_t *sl)
  12640. {
  12641. - write_seqcount_end(&sl->seqcount);
  12642. + __raw_write_seqcount_end(&sl->seqcount);
  12643. spin_unlock_irq(&sl->lock);
  12644. }
  12645. @@ -361,7 +393,7 @@
  12646. unsigned long flags;
  12647. spin_lock_irqsave(&sl->lock, flags);
  12648. - write_seqcount_begin(&sl->seqcount);
  12649. + __raw_write_seqcount_begin(&sl->seqcount);
  12650. return flags;
  12651. }
  12652. @@ -371,7 +403,7 @@
  12653. static inline void
  12654. write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
  12655. {
  12656. - write_seqcount_end(&sl->seqcount);
  12657. + __raw_write_seqcount_end(&sl->seqcount);
  12658. spin_unlock_irqrestore(&sl->lock, flags);
  12659. }
  12660. diff -Nur linux-4.1.39.orig/include/linux/signal.h linux-4.1.39/include/linux/signal.h
  12661. --- linux-4.1.39.orig/include/linux/signal.h 2017-03-13 21:04:36.000000000 +0100
  12662. +++ linux-4.1.39/include/linux/signal.h 2017-04-18 17:56:30.609396976 +0200
  12663. @@ -233,6 +233,7 @@
  12664. }
  12665. extern void flush_sigqueue(struct sigpending *queue);
  12666. +extern void flush_task_sigqueue(struct task_struct *tsk);
  12667. /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
  12668. static inline int valid_signal(unsigned long sig)
  12669. diff -Nur linux-4.1.39.orig/include/linux/skbuff.h linux-4.1.39/include/linux/skbuff.h
  12670. --- linux-4.1.39.orig/include/linux/skbuff.h 2017-03-13 21:04:36.000000000 +0100
  12671. +++ linux-4.1.39/include/linux/skbuff.h 2017-04-18 17:56:30.613397130 +0200
  12672. @@ -187,6 +187,7 @@
  12673. __u32 qlen;
  12674. spinlock_t lock;
  12675. + raw_spinlock_t raw_lock;
  12676. };
  12677. struct sk_buff;
  12678. @@ -1337,6 +1338,12 @@
  12679. __skb_queue_head_init(list);
  12680. }
  12681. +static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
  12682. +{
  12683. + raw_spin_lock_init(&list->raw_lock);
  12684. + __skb_queue_head_init(list);
  12685. +}
  12686. +
  12687. static inline void skb_queue_head_init_class(struct sk_buff_head *list,
  12688. struct lock_class_key *class)
  12689. {
  12690. diff -Nur linux-4.1.39.orig/include/linux/smp.h linux-4.1.39/include/linux/smp.h
  12691. --- linux-4.1.39.orig/include/linux/smp.h 2017-03-13 21:04:36.000000000 +0100
  12692. +++ linux-4.1.39/include/linux/smp.h 2017-04-18 17:56:30.613397130 +0200
  12693. @@ -185,6 +185,9 @@
  12694. #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
  12695. #define put_cpu() preempt_enable()
  12696. +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
  12697. +#define put_cpu_light() migrate_enable()
  12698. +
  12699. /*
  12700. * Callback to arch code if there's nosmp or maxcpus=0 on the
  12701. * boot command line:
  12702. diff -Nur linux-4.1.39.orig/include/linux/spinlock_api_smp.h linux-4.1.39/include/linux/spinlock_api_smp.h
  12703. --- linux-4.1.39.orig/include/linux/spinlock_api_smp.h 2017-03-13 21:04:36.000000000 +0100
  12704. +++ linux-4.1.39/include/linux/spinlock_api_smp.h 2017-04-18 17:56:30.613397130 +0200
  12705. @@ -189,6 +189,8 @@
  12706. return 0;
  12707. }
  12708. -#include <linux/rwlock_api_smp.h>
  12709. +#ifndef CONFIG_PREEMPT_RT_FULL
  12710. +# include <linux/rwlock_api_smp.h>
  12711. +#endif
  12712. #endif /* __LINUX_SPINLOCK_API_SMP_H */
  12713. diff -Nur linux-4.1.39.orig/include/linux/spinlock.h linux-4.1.39/include/linux/spinlock.h
  12714. --- linux-4.1.39.orig/include/linux/spinlock.h 2017-03-13 21:04:36.000000000 +0100
  12715. +++ linux-4.1.39/include/linux/spinlock.h 2017-04-18 17:56:30.613397130 +0200
  12716. @@ -281,7 +281,11 @@
  12717. #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock))
  12718. /* Include rwlock functions */
  12719. -#include <linux/rwlock.h>
  12720. +#ifdef CONFIG_PREEMPT_RT_FULL
  12721. +# include <linux/rwlock_rt.h>
  12722. +#else
  12723. +# include <linux/rwlock.h>
  12724. +#endif
  12725. /*
  12726. * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
  12727. @@ -292,6 +296,10 @@
  12728. # include <linux/spinlock_api_up.h>
  12729. #endif
  12730. +#ifdef CONFIG_PREEMPT_RT_FULL
  12731. +# include <linux/spinlock_rt.h>
  12732. +#else /* PREEMPT_RT_FULL */
  12733. +
  12734. /*
  12735. * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
  12736. */
  12737. @@ -426,4 +434,6 @@
  12738. #define atomic_dec_and_lock(atomic, lock) \
  12739. __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
  12740. +#endif /* !PREEMPT_RT_FULL */
  12741. +
  12742. #endif /* __LINUX_SPINLOCK_H */
  12743. diff -Nur linux-4.1.39.orig/include/linux/spinlock_rt.h linux-4.1.39/include/linux/spinlock_rt.h
  12744. --- linux-4.1.39.orig/include/linux/spinlock_rt.h 1970-01-01 01:00:00.000000000 +0100
  12745. +++ linux-4.1.39/include/linux/spinlock_rt.h 2017-04-18 17:56:30.613397130 +0200
  12746. @@ -0,0 +1,174 @@
  12747. +#ifndef __LINUX_SPINLOCK_RT_H
  12748. +#define __LINUX_SPINLOCK_RT_H
  12749. +
  12750. +#ifndef __LINUX_SPINLOCK_H
  12751. +#error Do not include directly. Use spinlock.h
  12752. +#endif
  12753. +
  12754. +#include <linux/bug.h>
  12755. +
  12756. +extern void
  12757. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key);
  12758. +
  12759. +#define spin_lock_init(slock) \
  12760. +do { \
  12761. + static struct lock_class_key __key; \
  12762. + \
  12763. + rt_mutex_init(&(slock)->lock); \
  12764. + __rt_spin_lock_init(slock, #slock, &__key); \
  12765. +} while (0)
  12766. +
  12767. +extern void __lockfunc rt_spin_lock(spinlock_t *lock);
  12768. +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
  12769. +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
  12770. +extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
  12771. +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
  12772. +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
  12773. +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
  12774. +extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
  12775. +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock);
  12776. +
  12777. +/*
  12778. + * lockdep-less calls, for derived types like rwlock:
  12779. + * (for trylock they can use rt_mutex_trylock() directly.
  12780. + */
  12781. +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
  12782. +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
  12783. +extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock);
  12784. +
  12785. +#define spin_lock(lock) \
  12786. + do { \
  12787. + migrate_disable(); \
  12788. + rt_spin_lock(lock); \
  12789. + } while (0)
  12790. +
  12791. +#define spin_lock_bh(lock) \
  12792. + do { \
  12793. + local_bh_disable(); \
  12794. + migrate_disable(); \
  12795. + rt_spin_lock(lock); \
  12796. + } while (0)
  12797. +
  12798. +#define spin_lock_irq(lock) spin_lock(lock)
  12799. +
  12800. +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock))
  12801. +
  12802. +#define spin_trylock(lock) \
  12803. +({ \
  12804. + int __locked; \
  12805. + migrate_disable(); \
  12806. + __locked = spin_do_trylock(lock); \
  12807. + if (!__locked) \
  12808. + migrate_enable(); \
  12809. + __locked; \
  12810. +})
  12811. +
  12812. +#ifdef CONFIG_LOCKDEP
  12813. +# define spin_lock_nested(lock, subclass) \
  12814. + do { \
  12815. + migrate_disable(); \
  12816. + rt_spin_lock_nested(lock, subclass); \
  12817. + } while (0)
  12818. +
  12819. +#define spin_lock_bh_nested(lock, subclass) \
  12820. + do { \
  12821. + local_bh_disable(); \
  12822. + migrate_disable(); \
  12823. + rt_spin_lock_nested(lock, subclass); \
  12824. + } while (0)
  12825. +
  12826. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  12827. + do { \
  12828. + typecheck(unsigned long, flags); \
  12829. + flags = 0; \
  12830. + migrate_disable(); \
  12831. + rt_spin_lock_nested(lock, subclass); \
  12832. + } while (0)
  12833. +#else
  12834. +# define spin_lock_nested(lock, subclass) spin_lock(lock)
  12835. +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock)
  12836. +
  12837. +# define spin_lock_irqsave_nested(lock, flags, subclass) \
  12838. + do { \
  12839. + typecheck(unsigned long, flags); \
  12840. + flags = 0; \
  12841. + spin_lock(lock); \
  12842. + } while (0)
  12843. +#endif
  12844. +
  12845. +#define spin_lock_irqsave(lock, flags) \
  12846. + do { \
  12847. + typecheck(unsigned long, flags); \
  12848. + flags = 0; \
  12849. + spin_lock(lock); \
  12850. + } while (0)
  12851. +
  12852. +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
  12853. +{
  12854. + unsigned long flags = 0;
  12855. +#ifdef CONFIG_TRACE_IRQFLAGS
  12856. + flags = rt_spin_lock_trace_flags(lock);
  12857. +#else
  12858. + spin_lock(lock); /* lock_local */
  12859. +#endif
  12860. + return flags;
  12861. +}
  12862. +
  12863. +/* FIXME: we need rt_spin_lock_nest_lock */
  12864. +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
  12865. +
  12866. +#define spin_unlock(lock) \
  12867. + do { \
  12868. + rt_spin_unlock(lock); \
  12869. + migrate_enable(); \
  12870. + } while (0)
  12871. +
  12872. +#define spin_unlock_bh(lock) \
  12873. + do { \
  12874. + rt_spin_unlock(lock); \
  12875. + migrate_enable(); \
  12876. + local_bh_enable(); \
  12877. + } while (0)
  12878. +
  12879. +#define spin_unlock_irq(lock) spin_unlock(lock)
  12880. +
  12881. +#define spin_unlock_irqrestore(lock, flags) \
  12882. + do { \
  12883. + typecheck(unsigned long, flags); \
  12884. + (void) flags; \
  12885. + spin_unlock(lock); \
  12886. + } while (0)
  12887. +
  12888. +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock))
  12889. +#define spin_trylock_irq(lock) spin_trylock(lock)
  12890. +
  12891. +#define spin_trylock_irqsave(lock, flags) \
  12892. + rt_spin_trylock_irqsave(lock, &(flags))
  12893. +
  12894. +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock)
  12895. +
  12896. +#ifdef CONFIG_GENERIC_LOCKBREAK
  12897. +# define spin_is_contended(lock) ((lock)->break_lock)
  12898. +#else
  12899. +# define spin_is_contended(lock) (((void)(lock), 0))
  12900. +#endif
  12901. +
  12902. +static inline int spin_can_lock(spinlock_t *lock)
  12903. +{
  12904. + return !rt_mutex_is_locked(&lock->lock);
  12905. +}
  12906. +
  12907. +static inline int spin_is_locked(spinlock_t *lock)
  12908. +{
  12909. + return rt_mutex_is_locked(&lock->lock);
  12910. +}
  12911. +
  12912. +static inline void assert_spin_locked(spinlock_t *lock)
  12913. +{
  12914. + BUG_ON(!spin_is_locked(lock));
  12915. +}
  12916. +
  12917. +#define atomic_dec_and_lock(atomic, lock) \
  12918. + atomic_dec_and_spin_lock(atomic, lock)
  12919. +
  12920. +#endif
  12921. diff -Nur linux-4.1.39.orig/include/linux/spinlock_types.h linux-4.1.39/include/linux/spinlock_types.h
  12922. --- linux-4.1.39.orig/include/linux/spinlock_types.h 2017-03-13 21:04:36.000000000 +0100
  12923. +++ linux-4.1.39/include/linux/spinlock_types.h 2017-04-18 17:56:30.613397130 +0200
  12924. @@ -9,80 +9,15 @@
  12925. * Released under the General Public License (GPL).
  12926. */
  12927. -#if defined(CONFIG_SMP)
  12928. -# include <asm/spinlock_types.h>
  12929. -#else
  12930. -# include <linux/spinlock_types_up.h>
  12931. -#endif
  12932. -
  12933. -#include <linux/lockdep.h>
  12934. -
  12935. -typedef struct raw_spinlock {
  12936. - arch_spinlock_t raw_lock;
  12937. -#ifdef CONFIG_GENERIC_LOCKBREAK
  12938. - unsigned int break_lock;
  12939. -#endif
  12940. -#ifdef CONFIG_DEBUG_SPINLOCK
  12941. - unsigned int magic, owner_cpu;
  12942. - void *owner;
  12943. -#endif
  12944. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12945. - struct lockdep_map dep_map;
  12946. -#endif
  12947. -} raw_spinlock_t;
  12948. -
  12949. -#define SPINLOCK_MAGIC 0xdead4ead
  12950. -
  12951. -#define SPINLOCK_OWNER_INIT ((void *)-1L)
  12952. -
  12953. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12954. -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  12955. -#else
  12956. -# define SPIN_DEP_MAP_INIT(lockname)
  12957. -#endif
  12958. +#include <linux/spinlock_types_raw.h>
  12959. -#ifdef CONFIG_DEBUG_SPINLOCK
  12960. -# define SPIN_DEBUG_INIT(lockname) \
  12961. - .magic = SPINLOCK_MAGIC, \
  12962. - .owner_cpu = -1, \
  12963. - .owner = SPINLOCK_OWNER_INIT,
  12964. +#ifndef CONFIG_PREEMPT_RT_FULL
  12965. +# include <linux/spinlock_types_nort.h>
  12966. +# include <linux/rwlock_types.h>
  12967. #else
  12968. -# define SPIN_DEBUG_INIT(lockname)
  12969. +# include <linux/rtmutex.h>
  12970. +# include <linux/spinlock_types_rt.h>
  12971. +# include <linux/rwlock_types_rt.h>
  12972. #endif
  12973. -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  12974. - { \
  12975. - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  12976. - SPIN_DEBUG_INIT(lockname) \
  12977. - SPIN_DEP_MAP_INIT(lockname) }
  12978. -
  12979. -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  12980. - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  12981. -
  12982. -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  12983. -
  12984. -typedef struct spinlock {
  12985. - union {
  12986. - struct raw_spinlock rlock;
  12987. -
  12988. -#ifdef CONFIG_DEBUG_LOCK_ALLOC
  12989. -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  12990. - struct {
  12991. - u8 __padding[LOCK_PADSIZE];
  12992. - struct lockdep_map dep_map;
  12993. - };
  12994. -#endif
  12995. - };
  12996. -} spinlock_t;
  12997. -
  12998. -#define __SPIN_LOCK_INITIALIZER(lockname) \
  12999. - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  13000. -
  13001. -#define __SPIN_LOCK_UNLOCKED(lockname) \
  13002. - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  13003. -
  13004. -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  13005. -
  13006. -#include <linux/rwlock_types.h>
  13007. -
  13008. #endif /* __LINUX_SPINLOCK_TYPES_H */
  13009. diff -Nur linux-4.1.39.orig/include/linux/spinlock_types_nort.h linux-4.1.39/include/linux/spinlock_types_nort.h
  13010. --- linux-4.1.39.orig/include/linux/spinlock_types_nort.h 1970-01-01 01:00:00.000000000 +0100
  13011. +++ linux-4.1.39/include/linux/spinlock_types_nort.h 2017-04-18 17:56:30.613397130 +0200
  13012. @@ -0,0 +1,33 @@
  13013. +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
  13014. +#define __LINUX_SPINLOCK_TYPES_NORT_H
  13015. +
  13016. +#ifndef __LINUX_SPINLOCK_TYPES_H
  13017. +#error "Do not include directly. Include spinlock_types.h instead"
  13018. +#endif
  13019. +
  13020. +/*
  13021. + * The non RT version maps spinlocks to raw_spinlocks
  13022. + */
  13023. +typedef struct spinlock {
  13024. + union {
  13025. + struct raw_spinlock rlock;
  13026. +
  13027. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13028. +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
  13029. + struct {
  13030. + u8 __padding[LOCK_PADSIZE];
  13031. + struct lockdep_map dep_map;
  13032. + };
  13033. +#endif
  13034. + };
  13035. +} spinlock_t;
  13036. +
  13037. +#define __SPIN_LOCK_INITIALIZER(lockname) \
  13038. + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
  13039. +
  13040. +#define __SPIN_LOCK_UNLOCKED(lockname) \
  13041. + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
  13042. +
  13043. +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
  13044. +
  13045. +#endif
  13046. diff -Nur linux-4.1.39.orig/include/linux/spinlock_types_raw.h linux-4.1.39/include/linux/spinlock_types_raw.h
  13047. --- linux-4.1.39.orig/include/linux/spinlock_types_raw.h 1970-01-01 01:00:00.000000000 +0100
  13048. +++ linux-4.1.39/include/linux/spinlock_types_raw.h 2017-04-18 17:56:30.613397130 +0200
  13049. @@ -0,0 +1,56 @@
  13050. +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
  13051. +#define __LINUX_SPINLOCK_TYPES_RAW_H
  13052. +
  13053. +#if defined(CONFIG_SMP)
  13054. +# include <asm/spinlock_types.h>
  13055. +#else
  13056. +# include <linux/spinlock_types_up.h>
  13057. +#endif
  13058. +
  13059. +#include <linux/lockdep.h>
  13060. +
  13061. +typedef struct raw_spinlock {
  13062. + arch_spinlock_t raw_lock;
  13063. +#ifdef CONFIG_GENERIC_LOCKBREAK
  13064. + unsigned int break_lock;
  13065. +#endif
  13066. +#ifdef CONFIG_DEBUG_SPINLOCK
  13067. + unsigned int magic, owner_cpu;
  13068. + void *owner;
  13069. +#endif
  13070. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13071. + struct lockdep_map dep_map;
  13072. +#endif
  13073. +} raw_spinlock_t;
  13074. +
  13075. +#define SPINLOCK_MAGIC 0xdead4ead
  13076. +
  13077. +#define SPINLOCK_OWNER_INIT ((void *)-1L)
  13078. +
  13079. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13080. +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
  13081. +#else
  13082. +# define SPIN_DEP_MAP_INIT(lockname)
  13083. +#endif
  13084. +
  13085. +#ifdef CONFIG_DEBUG_SPINLOCK
  13086. +# define SPIN_DEBUG_INIT(lockname) \
  13087. + .magic = SPINLOCK_MAGIC, \
  13088. + .owner_cpu = -1, \
  13089. + .owner = SPINLOCK_OWNER_INIT,
  13090. +#else
  13091. +# define SPIN_DEBUG_INIT(lockname)
  13092. +#endif
  13093. +
  13094. +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
  13095. + { \
  13096. + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
  13097. + SPIN_DEBUG_INIT(lockname) \
  13098. + SPIN_DEP_MAP_INIT(lockname) }
  13099. +
  13100. +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
  13101. + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
  13102. +
  13103. +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
  13104. +
  13105. +#endif
  13106. diff -Nur linux-4.1.39.orig/include/linux/spinlock_types_rt.h linux-4.1.39/include/linux/spinlock_types_rt.h
  13107. --- linux-4.1.39.orig/include/linux/spinlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100
  13108. +++ linux-4.1.39/include/linux/spinlock_types_rt.h 2017-04-18 17:56:30.613397130 +0200
  13109. @@ -0,0 +1,51 @@
  13110. +#ifndef __LINUX_SPINLOCK_TYPES_RT_H
  13111. +#define __LINUX_SPINLOCK_TYPES_RT_H
  13112. +
  13113. +#ifndef __LINUX_SPINLOCK_TYPES_H
  13114. +#error "Do not include directly. Include spinlock_types.h instead"
  13115. +#endif
  13116. +
  13117. +#include <linux/cache.h>
  13118. +
  13119. +/*
  13120. + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field:
  13121. + */
  13122. +typedef struct spinlock {
  13123. + struct rt_mutex lock;
  13124. + unsigned int break_lock;
  13125. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  13126. + struct lockdep_map dep_map;
  13127. +#endif
  13128. +} spinlock_t;
  13129. +
  13130. +#ifdef CONFIG_DEBUG_RT_MUTEXES
  13131. +# define __RT_SPIN_INITIALIZER(name) \
  13132. + { \
  13133. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  13134. + .save_state = 1, \
  13135. + .file = __FILE__, \
  13136. + .line = __LINE__ , \
  13137. + }
  13138. +#else
  13139. +# define __RT_SPIN_INITIALIZER(name) \
  13140. + { \
  13141. + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
  13142. + .save_state = 1, \
  13143. + }
  13144. +#endif
  13145. +
  13146. +/*
  13147. +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
  13148. +*/
  13149. +
  13150. +#define __SPIN_LOCK_UNLOCKED(name) \
  13151. + { .lock = __RT_SPIN_INITIALIZER(name.lock), \
  13152. + SPIN_DEP_MAP_INIT(name) }
  13153. +
  13154. +#define __DEFINE_SPINLOCK(name) \
  13155. + spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
  13156. +
  13157. +#define DEFINE_SPINLOCK(name) \
  13158. + spinlock_t name __cacheline_aligned_in_smp = __SPIN_LOCK_UNLOCKED(name)
  13159. +
  13160. +#endif
  13161. diff -Nur linux-4.1.39.orig/include/linux/srcu.h linux-4.1.39/include/linux/srcu.h
  13162. --- linux-4.1.39.orig/include/linux/srcu.h 2017-03-13 21:04:36.000000000 +0100
  13163. +++ linux-4.1.39/include/linux/srcu.h 2017-04-18 17:56:30.613397130 +0200
  13164. @@ -84,10 +84,10 @@
  13165. void process_srcu(struct work_struct *work);
  13166. -#define __SRCU_STRUCT_INIT(name) \
  13167. +#define __SRCU_STRUCT_INIT(name, pcpu_name) \
  13168. { \
  13169. .completed = -300, \
  13170. - .per_cpu_ref = &name##_srcu_array, \
  13171. + .per_cpu_ref = &pcpu_name, \
  13172. .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
  13173. .running = false, \
  13174. .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
  13175. @@ -104,7 +104,7 @@
  13176. */
  13177. #define __DEFINE_SRCU(name, is_static) \
  13178. static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
  13179. - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
  13180. + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array)
  13181. #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
  13182. #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
  13183. diff -Nur linux-4.1.39.orig/include/linux/swap.h linux-4.1.39/include/linux/swap.h
  13184. --- linux-4.1.39.orig/include/linux/swap.h 2017-03-13 21:04:36.000000000 +0100
  13185. +++ linux-4.1.39/include/linux/swap.h 2017-04-18 17:56:30.613397130 +0200
  13186. @@ -11,6 +11,7 @@
  13187. #include <linux/fs.h>
  13188. #include <linux/atomic.h>
  13189. #include <linux/page-flags.h>
  13190. +#include <linux/locallock.h>
  13191. #include <asm/page.h>
  13192. struct notifier_block;
  13193. @@ -252,7 +253,8 @@
  13194. void *workingset_eviction(struct address_space *mapping, struct page *page);
  13195. bool workingset_refault(void *shadow);
  13196. void workingset_activation(struct page *page);
  13197. -extern struct list_lru workingset_shadow_nodes;
  13198. +extern struct list_lru __workingset_shadow_nodes;
  13199. +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  13200. static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
  13201. {
  13202. @@ -296,6 +298,7 @@
  13203. /* linux/mm/swap.c */
  13204. +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock);
  13205. extern void lru_cache_add(struct page *);
  13206. extern void lru_cache_add_anon(struct page *page);
  13207. extern void lru_cache_add_file(struct page *page);
  13208. diff -Nur linux-4.1.39.orig/include/linux/thread_info.h linux-4.1.39/include/linux/thread_info.h
  13209. --- linux-4.1.39.orig/include/linux/thread_info.h 2017-03-13 21:04:36.000000000 +0100
  13210. +++ linux-4.1.39/include/linux/thread_info.h 2017-04-18 17:56:30.613397130 +0200
  13211. @@ -102,7 +102,17 @@
  13212. #define test_thread_flag(flag) \
  13213. test_ti_thread_flag(current_thread_info(), flag)
  13214. -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  13215. +#ifdef CONFIG_PREEMPT_LAZY
  13216. +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
  13217. + test_thread_flag(TIF_NEED_RESCHED_LAZY))
  13218. +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
  13219. +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY))
  13220. +
  13221. +#else
  13222. +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
  13223. +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
  13224. +#define tif_need_resched_lazy() 0
  13225. +#endif
  13226. #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
  13227. /*
  13228. diff -Nur linux-4.1.39.orig/include/linux/timer.h linux-4.1.39/include/linux/timer.h
  13229. --- linux-4.1.39.orig/include/linux/timer.h 2017-03-13 21:04:36.000000000 +0100
  13230. +++ linux-4.1.39/include/linux/timer.h 2017-04-18 17:56:30.613397130 +0200
  13231. @@ -241,7 +241,7 @@
  13232. extern int try_to_del_timer_sync(struct timer_list *timer);
  13233. -#ifdef CONFIG_SMP
  13234. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  13235. extern int del_timer_sync(struct timer_list *timer);
  13236. #else
  13237. # define del_timer_sync(t) del_timer(t)
  13238. diff -Nur linux-4.1.39.orig/include/linux/uaccess.h linux-4.1.39/include/linux/uaccess.h
  13239. --- linux-4.1.39.orig/include/linux/uaccess.h 2017-03-13 21:04:36.000000000 +0100
  13240. +++ linux-4.1.39/include/linux/uaccess.h 2017-04-18 17:56:30.613397130 +0200
  13241. @@ -1,21 +1,31 @@
  13242. #ifndef __LINUX_UACCESS_H__
  13243. #define __LINUX_UACCESS_H__
  13244. -#include <linux/preempt.h>
  13245. +#include <linux/sched.h>
  13246. #include <asm/uaccess.h>
  13247. +static __always_inline void pagefault_disabled_inc(void)
  13248. +{
  13249. + current->pagefault_disabled++;
  13250. +}
  13251. +
  13252. +static __always_inline void pagefault_disabled_dec(void)
  13253. +{
  13254. + current->pagefault_disabled--;
  13255. + WARN_ON(current->pagefault_disabled < 0);
  13256. +}
  13257. +
  13258. /*
  13259. - * These routines enable/disable the pagefault handler in that
  13260. - * it will not take any locks and go straight to the fixup table.
  13261. + * These routines enable/disable the pagefault handler. If disabled, it will
  13262. + * not take any locks and go straight to the fixup table.
  13263. *
  13264. - * They have great resemblance to the preempt_disable/enable calls
  13265. - * and in fact they are identical; this is because currently there is
  13266. - * no other way to make the pagefault handlers do this. So we do
  13267. - * disable preemption but we don't necessarily care about that.
  13268. + * User access methods will not sleep when called from a pagefault_disabled()
  13269. + * environment.
  13270. */
  13271. static inline void pagefault_disable(void)
  13272. {
  13273. - preempt_count_inc();
  13274. + migrate_disable();
  13275. + pagefault_disabled_inc();
  13276. /*
  13277. * make sure to have issued the store before a pagefault
  13278. * can hit.
  13279. @@ -25,18 +35,32 @@
  13280. static inline void pagefault_enable(void)
  13281. {
  13282. -#ifndef CONFIG_PREEMPT
  13283. /*
  13284. * make sure to issue those last loads/stores before enabling
  13285. * the pagefault handler again.
  13286. */
  13287. barrier();
  13288. - preempt_count_dec();
  13289. -#else
  13290. - preempt_enable();
  13291. -#endif
  13292. + pagefault_disabled_dec();
  13293. + migrate_enable();
  13294. }
  13295. +/*
  13296. + * Is the pagefault handler disabled? If so, user access methods will not sleep.
  13297. + */
  13298. +#define pagefault_disabled() (current->pagefault_disabled != 0)
  13299. +
  13300. +/*
  13301. + * The pagefault handler is in general disabled by pagefault_disable() or
  13302. + * when in irq context (via in_atomic()).
  13303. + *
  13304. + * This function should only be used by the fault handlers. Other users should
  13305. + * stick to pagefault_disabled().
  13306. + * Please NEVER use preempt_disable() to disable the fault handler. With
  13307. + * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled.
  13308. + * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT.
  13309. + */
  13310. +#define faulthandler_disabled() (pagefault_disabled() || in_atomic())
  13311. +
  13312. #ifndef ARCH_HAS_NOCACHE_UACCESS
  13313. static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
  13314. diff -Nur linux-4.1.39.orig/include/linux/uprobes.h linux-4.1.39/include/linux/uprobes.h
  13315. --- linux-4.1.39.orig/include/linux/uprobes.h 2017-03-13 21:04:36.000000000 +0100
  13316. +++ linux-4.1.39/include/linux/uprobes.h 2017-04-18 17:56:30.613397130 +0200
  13317. @@ -27,6 +27,7 @@
  13318. #include <linux/errno.h>
  13319. #include <linux/rbtree.h>
  13320. #include <linux/types.h>
  13321. +#include <linux/wait.h>
  13322. struct vm_area_struct;
  13323. struct mm_struct;
  13324. diff -Nur linux-4.1.39.orig/include/linux/vmstat.h linux-4.1.39/include/linux/vmstat.h
  13325. --- linux-4.1.39.orig/include/linux/vmstat.h 2017-03-13 21:04:36.000000000 +0100
  13326. +++ linux-4.1.39/include/linux/vmstat.h 2017-04-18 17:56:30.613397130 +0200
  13327. @@ -33,7 +33,9 @@
  13328. */
  13329. static inline void __count_vm_event(enum vm_event_item item)
  13330. {
  13331. + preempt_disable_rt();
  13332. raw_cpu_inc(vm_event_states.event[item]);
  13333. + preempt_enable_rt();
  13334. }
  13335. static inline void count_vm_event(enum vm_event_item item)
  13336. @@ -43,7 +45,9 @@
  13337. static inline void __count_vm_events(enum vm_event_item item, long delta)
  13338. {
  13339. + preempt_disable_rt();
  13340. raw_cpu_add(vm_event_states.event[item], delta);
  13341. + preempt_enable_rt();
  13342. }
  13343. static inline void count_vm_events(enum vm_event_item item, long delta)
  13344. diff -Nur linux-4.1.39.orig/include/linux/wait.h linux-4.1.39/include/linux/wait.h
  13345. --- linux-4.1.39.orig/include/linux/wait.h 2017-03-13 21:04:36.000000000 +0100
  13346. +++ linux-4.1.39/include/linux/wait.h 2017-04-18 17:56:30.613397130 +0200
  13347. @@ -8,6 +8,7 @@
  13348. #include <linux/spinlock.h>
  13349. #include <asm/current.h>
  13350. #include <uapi/linux/wait.h>
  13351. +#include <linux/atomic.h>
  13352. typedef struct __wait_queue wait_queue_t;
  13353. typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
  13354. diff -Nur linux-4.1.39.orig/include/linux/wait-simple.h linux-4.1.39/include/linux/wait-simple.h
  13355. --- linux-4.1.39.orig/include/linux/wait-simple.h 1970-01-01 01:00:00.000000000 +0100
  13356. +++ linux-4.1.39/include/linux/wait-simple.h 2017-04-18 17:56:30.613397130 +0200
  13357. @@ -0,0 +1,207 @@
  13358. +#ifndef _LINUX_WAIT_SIMPLE_H
  13359. +#define _LINUX_WAIT_SIMPLE_H
  13360. +
  13361. +#include <linux/spinlock.h>
  13362. +#include <linux/list.h>
  13363. +
  13364. +#include <asm/current.h>
  13365. +
  13366. +struct swaiter {
  13367. + struct task_struct *task;
  13368. + struct list_head node;
  13369. +};
  13370. +
  13371. +#define DEFINE_SWAITER(name) \
  13372. + struct swaiter name = { \
  13373. + .task = current, \
  13374. + .node = LIST_HEAD_INIT((name).node), \
  13375. + }
  13376. +
  13377. +struct swait_head {
  13378. + raw_spinlock_t lock;
  13379. + struct list_head list;
  13380. +};
  13381. +
  13382. +#define SWAIT_HEAD_INITIALIZER(name) { \
  13383. + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
  13384. + .list = LIST_HEAD_INIT((name).list), \
  13385. + }
  13386. +
  13387. +#define DEFINE_SWAIT_HEAD(name) \
  13388. + struct swait_head name = SWAIT_HEAD_INITIALIZER(name)
  13389. +
  13390. +extern void __init_swait_head(struct swait_head *h, struct lock_class_key *key);
  13391. +
  13392. +#define init_swait_head(swh) \
  13393. + do { \
  13394. + static struct lock_class_key __key; \
  13395. + \
  13396. + __init_swait_head((swh), &__key); \
  13397. + } while (0)
  13398. +
  13399. +/*
  13400. + * Waiter functions
  13401. + */
  13402. +extern void swait_prepare_locked(struct swait_head *head, struct swaiter *w);
  13403. +extern void swait_prepare(struct swait_head *head, struct swaiter *w, int state);
  13404. +extern void swait_finish_locked(struct swait_head *head, struct swaiter *w);
  13405. +extern void swait_finish(struct swait_head *head, struct swaiter *w);
  13406. +
  13407. +/* Check whether a head has waiters enqueued */
  13408. +static inline bool swaitqueue_active(struct swait_head *h)
  13409. +{
  13410. + /* Make sure the condition is visible before checking list_empty() */
  13411. + smp_mb();
  13412. + return !list_empty(&h->list);
  13413. +}
  13414. +
  13415. +/*
  13416. + * Wakeup functions
  13417. + */
  13418. +extern unsigned int __swait_wake(struct swait_head *head, unsigned int state, unsigned int num);
  13419. +extern unsigned int __swait_wake_locked(struct swait_head *head, unsigned int state, unsigned int num);
  13420. +
  13421. +#define swait_wake(head) __swait_wake(head, TASK_NORMAL, 1)
  13422. +#define swait_wake_interruptible(head) __swait_wake(head, TASK_INTERRUPTIBLE, 1)
  13423. +#define swait_wake_all(head) __swait_wake(head, TASK_NORMAL, 0)
  13424. +#define swait_wake_all_interruptible(head) __swait_wake(head, TASK_INTERRUPTIBLE, 0)
  13425. +
  13426. +/*
  13427. + * Event API
  13428. + */
  13429. +#define __swait_event(wq, condition) \
  13430. +do { \
  13431. + DEFINE_SWAITER(__wait); \
  13432. + \
  13433. + for (;;) { \
  13434. + swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
  13435. + if (condition) \
  13436. + break; \
  13437. + schedule(); \
  13438. + } \
  13439. + swait_finish(&wq, &__wait); \
  13440. +} while (0)
  13441. +
  13442. +/**
  13443. + * swait_event - sleep until a condition gets true
  13444. + * @wq: the waitqueue to wait on
  13445. + * @condition: a C expression for the event to wait for
  13446. + *
  13447. + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  13448. + * @condition evaluates to true. The @condition is checked each time
  13449. + * the waitqueue @wq is woken up.
  13450. + *
  13451. + * wake_up() has to be called after changing any variable that could
  13452. + * change the result of the wait condition.
  13453. + */
  13454. +#define swait_event(wq, condition) \
  13455. +do { \
  13456. + if (condition) \
  13457. + break; \
  13458. + __swait_event(wq, condition); \
  13459. +} while (0)
  13460. +
  13461. +#define __swait_event_interruptible(wq, condition, ret) \
  13462. +do { \
  13463. + DEFINE_SWAITER(__wait); \
  13464. + \
  13465. + for (;;) { \
  13466. + swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
  13467. + if (condition) \
  13468. + break; \
  13469. + if (signal_pending(current)) { \
  13470. + ret = -ERESTARTSYS; \
  13471. + break; \
  13472. + } \
  13473. + schedule(); \
  13474. + } \
  13475. + swait_finish(&wq, &__wait); \
  13476. +} while (0)
  13477. +
  13478. +#define __swait_event_interruptible_timeout(wq, condition, ret) \
  13479. +do { \
  13480. + DEFINE_SWAITER(__wait); \
  13481. + \
  13482. + for (;;) { \
  13483. + swait_prepare(&wq, &__wait, TASK_INTERRUPTIBLE); \
  13484. + if (condition) \
  13485. + break; \
  13486. + if (signal_pending(current)) { \
  13487. + ret = -ERESTARTSYS; \
  13488. + break; \
  13489. + } \
  13490. + ret = schedule_timeout(ret); \
  13491. + if (!ret) \
  13492. + break; \
  13493. + } \
  13494. + swait_finish(&wq, &__wait); \
  13495. +} while (0)
  13496. +
  13497. +/**
  13498. + * swait_event_interruptible - sleep until a condition gets true
  13499. + * @wq: the waitqueue to wait on
  13500. + * @condition: a C expression for the event to wait for
  13501. + *
  13502. + * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  13503. + * @condition evaluates to true. The @condition is checked each time
  13504. + * the waitqueue @wq is woken up.
  13505. + *
  13506. + * wake_up() has to be called after changing any variable that could
  13507. + * change the result of the wait condition.
  13508. + */
  13509. +#define swait_event_interruptible(wq, condition) \
  13510. +({ \
  13511. + int __ret = 0; \
  13512. + if (!(condition)) \
  13513. + __swait_event_interruptible(wq, condition, __ret); \
  13514. + __ret; \
  13515. +})
  13516. +
  13517. +#define swait_event_interruptible_timeout(wq, condition, timeout) \
  13518. +({ \
  13519. + int __ret = timeout; \
  13520. + if (!(condition)) \
  13521. + __swait_event_interruptible_timeout(wq, condition, __ret); \
  13522. + __ret; \
  13523. +})
  13524. +
  13525. +#define __swait_event_timeout(wq, condition, ret) \
  13526. +do { \
  13527. + DEFINE_SWAITER(__wait); \
  13528. + \
  13529. + for (;;) { \
  13530. + swait_prepare(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
  13531. + if (condition) \
  13532. + break; \
  13533. + ret = schedule_timeout(ret); \
  13534. + if (!ret) \
  13535. + break; \
  13536. + } \
  13537. + swait_finish(&wq, &__wait); \
  13538. +} while (0)
  13539. +
  13540. +/**
  13541. + * swait_event_timeout - sleep until a condition gets true or a timeout elapses
  13542. + * @wq: the waitqueue to wait on
  13543. + * @condition: a C expression for the event to wait for
  13544. + * @timeout: timeout, in jiffies
  13545. + *
  13546. + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
  13547. + * @condition evaluates to true. The @condition is checked each time
  13548. + * the waitqueue @wq is woken up.
  13549. + *
  13550. + * wake_up() has to be called after changing any variable that could
  13551. + * change the result of the wait condition.
  13552. + *
  13553. + * The function returns 0 if the @timeout elapsed, and the remaining
  13554. + * jiffies if the condition evaluated to true before the timeout elapsed.
  13555. + */
  13556. +#define swait_event_timeout(wq, condition, timeout) \
  13557. +({ \
  13558. + long __ret = timeout; \
  13559. + if (!(condition)) \
  13560. + __swait_event_timeout(wq, condition, __ret); \
  13561. + __ret; \
  13562. +})
  13563. +
  13564. +#endif
  13565. diff -Nur linux-4.1.39.orig/include/linux/work-simple.h linux-4.1.39/include/linux/work-simple.h
  13566. --- linux-4.1.39.orig/include/linux/work-simple.h 1970-01-01 01:00:00.000000000 +0100
  13567. +++ linux-4.1.39/include/linux/work-simple.h 2017-04-18 17:56:30.613397130 +0200
  13568. @@ -0,0 +1,24 @@
  13569. +#ifndef _LINUX_SWORK_H
  13570. +#define _LINUX_SWORK_H
  13571. +
  13572. +#include <linux/list.h>
  13573. +
  13574. +struct swork_event {
  13575. + struct list_head item;
  13576. + unsigned long flags;
  13577. + void (*func)(struct swork_event *);
  13578. +};
  13579. +
  13580. +static inline void INIT_SWORK(struct swork_event *event,
  13581. + void (*func)(struct swork_event *))
  13582. +{
  13583. + event->flags = 0;
  13584. + event->func = func;
  13585. +}
  13586. +
  13587. +bool swork_queue(struct swork_event *sev);
  13588. +
  13589. +int swork_get(void);
  13590. +void swork_put(void);
  13591. +
  13592. +#endif /* _LINUX_SWORK_H */
  13593. diff -Nur linux-4.1.39.orig/include/net/dst.h linux-4.1.39/include/net/dst.h
  13594. --- linux-4.1.39.orig/include/net/dst.h 2017-03-13 21:04:36.000000000 +0100
  13595. +++ linux-4.1.39/include/net/dst.h 2017-04-18 17:56:30.613397130 +0200
  13596. @@ -436,7 +436,7 @@
  13597. static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
  13598. struct sk_buff *skb)
  13599. {
  13600. - const struct hh_cache *hh;
  13601. + struct hh_cache *hh;
  13602. if (dst->pending_confirm) {
  13603. unsigned long now = jiffies;
  13604. diff -Nur linux-4.1.39.orig/include/net/neighbour.h linux-4.1.39/include/net/neighbour.h
  13605. --- linux-4.1.39.orig/include/net/neighbour.h 2017-03-13 21:04:36.000000000 +0100
  13606. +++ linux-4.1.39/include/net/neighbour.h 2017-04-18 17:56:30.613397130 +0200
  13607. @@ -445,7 +445,7 @@
  13608. }
  13609. #endif
  13610. -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
  13611. +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
  13612. {
  13613. unsigned int seq;
  13614. int hh_len;
  13615. @@ -500,7 +500,7 @@
  13616. #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
  13617. -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
  13618. +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n,
  13619. const struct net_device *dev)
  13620. {
  13621. unsigned int seq;
  13622. diff -Nur linux-4.1.39.orig/include/net/netns/ipv4.h linux-4.1.39/include/net/netns/ipv4.h
  13623. --- linux-4.1.39.orig/include/net/netns/ipv4.h 2017-03-13 21:04:36.000000000 +0100
  13624. +++ linux-4.1.39/include/net/netns/ipv4.h 2017-04-18 17:56:30.613397130 +0200
  13625. @@ -69,6 +69,7 @@
  13626. int sysctl_icmp_echo_ignore_all;
  13627. int sysctl_icmp_echo_ignore_broadcasts;
  13628. + int sysctl_icmp_echo_sysrq;
  13629. int sysctl_icmp_ignore_bogus_error_responses;
  13630. int sysctl_icmp_ratelimit;
  13631. int sysctl_icmp_ratemask;
  13632. diff -Nur linux-4.1.39.orig/include/trace/events/hist.h linux-4.1.39/include/trace/events/hist.h
  13633. --- linux-4.1.39.orig/include/trace/events/hist.h 1970-01-01 01:00:00.000000000 +0100
  13634. +++ linux-4.1.39/include/trace/events/hist.h 2017-04-18 17:56:30.613397130 +0200
  13635. @@ -0,0 +1,74 @@
  13636. +#undef TRACE_SYSTEM
  13637. +#define TRACE_SYSTEM hist
  13638. +
  13639. +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ)
  13640. +#define _TRACE_HIST_H
  13641. +
  13642. +#include "latency_hist.h"
  13643. +#include <linux/tracepoint.h>
  13644. +
  13645. +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST)
  13646. +#define trace_preemptirqsoff_hist(a, b)
  13647. +#define trace_preemptirqsoff_hist_rcuidle(a, b)
  13648. +#else
  13649. +TRACE_EVENT(preemptirqsoff_hist,
  13650. +
  13651. + TP_PROTO(int reason, int starthist),
  13652. +
  13653. + TP_ARGS(reason, starthist),
  13654. +
  13655. + TP_STRUCT__entry(
  13656. + __field(int, reason)
  13657. + __field(int, starthist)
  13658. + ),
  13659. +
  13660. + TP_fast_assign(
  13661. + __entry->reason = reason;
  13662. + __entry->starthist = starthist;
  13663. + ),
  13664. +
  13665. + TP_printk("reason=%s starthist=%s", getaction(__entry->reason),
  13666. + __entry->starthist ? "start" : "stop")
  13667. +);
  13668. +#endif
  13669. +
  13670. +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST
  13671. +#define trace_hrtimer_interrupt(a, b, c, d)
  13672. +#define trace_hrtimer_interrupt_rcuidle(a, b, c, d)
  13673. +#else
  13674. +TRACE_EVENT(hrtimer_interrupt,
  13675. +
  13676. + TP_PROTO(int cpu, long long offset, struct task_struct *curr,
  13677. + struct task_struct *task),
  13678. +
  13679. + TP_ARGS(cpu, offset, curr, task),
  13680. +
  13681. + TP_STRUCT__entry(
  13682. + __field(int, cpu)
  13683. + __field(long long, offset)
  13684. + __array(char, ccomm, TASK_COMM_LEN)
  13685. + __field(int, cprio)
  13686. + __array(char, tcomm, TASK_COMM_LEN)
  13687. + __field(int, tprio)
  13688. + ),
  13689. +
  13690. + TP_fast_assign(
  13691. + __entry->cpu = cpu;
  13692. + __entry->offset = offset;
  13693. + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN);
  13694. + __entry->cprio = curr->prio;
  13695. + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>",
  13696. + task != NULL ? TASK_COMM_LEN : 7);
  13697. + __entry->tprio = task != NULL ? task->prio : -1;
  13698. + ),
  13699. +
  13700. + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]",
  13701. + __entry->cpu, __entry->offset, __entry->ccomm,
  13702. + __entry->cprio, __entry->tcomm, __entry->tprio)
  13703. +);
  13704. +#endif
  13705. +
  13706. +#endif /* _TRACE_HIST_H */
  13707. +
  13708. +/* This part must be outside protection */
  13709. +#include <trace/define_trace.h>
  13710. diff -Nur linux-4.1.39.orig/include/trace/events/latency_hist.h linux-4.1.39/include/trace/events/latency_hist.h
  13711. --- linux-4.1.39.orig/include/trace/events/latency_hist.h 1970-01-01 01:00:00.000000000 +0100
  13712. +++ linux-4.1.39/include/trace/events/latency_hist.h 2017-04-18 17:56:30.613397130 +0200
  13713. @@ -0,0 +1,29 @@
  13714. +#ifndef _LATENCY_HIST_H
  13715. +#define _LATENCY_HIST_H
  13716. +
  13717. +enum hist_action {
  13718. + IRQS_ON,
  13719. + PREEMPT_ON,
  13720. + TRACE_STOP,
  13721. + IRQS_OFF,
  13722. + PREEMPT_OFF,
  13723. + TRACE_START,
  13724. +};
  13725. +
  13726. +static char *actions[] = {
  13727. + "IRQS_ON",
  13728. + "PREEMPT_ON",
  13729. + "TRACE_STOP",
  13730. + "IRQS_OFF",
  13731. + "PREEMPT_OFF",
  13732. + "TRACE_START",
  13733. +};
  13734. +
  13735. +static inline char *getaction(int action)
  13736. +{
  13737. + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0]))
  13738. + return actions[action];
  13739. + return "unknown";
  13740. +}
  13741. +
  13742. +#endif /* _LATENCY_HIST_H */
  13743. diff -Nur linux-4.1.39.orig/include/trace/events/sched.h linux-4.1.39/include/trace/events/sched.h
  13744. --- linux-4.1.39.orig/include/trace/events/sched.h 2017-03-13 21:04:36.000000000 +0100
  13745. +++ linux-4.1.39/include/trace/events/sched.h 2017-04-18 17:56:30.613397130 +0200
  13746. @@ -55,9 +55,9 @@
  13747. */
  13748. DECLARE_EVENT_CLASS(sched_wakeup_template,
  13749. - TP_PROTO(struct task_struct *p, int success),
  13750. + TP_PROTO(struct task_struct *p),
  13751. - TP_ARGS(__perf_task(p), success),
  13752. + TP_ARGS(__perf_task(p)),
  13753. TP_STRUCT__entry(
  13754. __array( char, comm, TASK_COMM_LEN )
  13755. @@ -71,25 +71,37 @@
  13756. memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
  13757. __entry->pid = p->pid;
  13758. __entry->prio = p->prio;
  13759. - __entry->success = success;
  13760. + __entry->success = 1; /* rudiment, kill when possible */
  13761. __entry->target_cpu = task_cpu(p);
  13762. ),
  13763. - TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
  13764. + TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
  13765. __entry->comm, __entry->pid, __entry->prio,
  13766. - __entry->success, __entry->target_cpu)
  13767. + __entry->target_cpu)
  13768. );
  13769. +/*
  13770. + * Tracepoint called when waking a task; this tracepoint is guaranteed to be
  13771. + * called from the waking context.
  13772. + */
  13773. +DEFINE_EVENT(sched_wakeup_template, sched_waking,
  13774. + TP_PROTO(struct task_struct *p),
  13775. + TP_ARGS(p));
  13776. +
  13777. +/*
  13778. + * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
  13779. + * It it not always called from the waking context.
  13780. + */
  13781. DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
  13782. - TP_PROTO(struct task_struct *p, int success),
  13783. - TP_ARGS(p, success));
  13784. + TP_PROTO(struct task_struct *p),
  13785. + TP_ARGS(p));
  13786. /*
  13787. * Tracepoint for waking up a new task:
  13788. */
  13789. DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
  13790. - TP_PROTO(struct task_struct *p, int success),
  13791. - TP_ARGS(p, success));
  13792. + TP_PROTO(struct task_struct *p),
  13793. + TP_ARGS(p));
  13794. #ifdef CREATE_TRACE_POINTS
  13795. static inline long __trace_sched_switch_state(struct task_struct *p)
  13796. diff -Nur linux-4.1.39.orig/init/Kconfig linux-4.1.39/init/Kconfig
  13797. --- linux-4.1.39.orig/init/Kconfig 2017-03-13 21:04:36.000000000 +0100
  13798. +++ linux-4.1.39/init/Kconfig 2017-04-18 17:56:30.613397130 +0200
  13799. @@ -637,7 +637,7 @@
  13800. config RCU_FAST_NO_HZ
  13801. bool "Accelerate last non-dyntick-idle CPU's grace periods"
  13802. - depends on NO_HZ_COMMON && SMP
  13803. + depends on NO_HZ_COMMON && SMP && !PREEMPT_RT_FULL
  13804. default n
  13805. help
  13806. This option permits CPUs to enter dynticks-idle state even if
  13807. @@ -664,7 +664,7 @@
  13808. config RCU_BOOST
  13809. bool "Enable RCU priority boosting"
  13810. depends on RT_MUTEXES && PREEMPT_RCU
  13811. - default n
  13812. + default y if PREEMPT_RT_FULL
  13813. help
  13814. This option boosts the priority of preempted RCU readers that
  13815. block the current preemptible RCU grace period for too long.
  13816. @@ -1101,6 +1101,7 @@
  13817. config RT_GROUP_SCHED
  13818. bool "Group scheduling for SCHED_RR/FIFO"
  13819. depends on CGROUP_SCHED
  13820. + depends on !PREEMPT_RT_FULL
  13821. default n
  13822. help
  13823. This feature lets you explicitly allocate real CPU bandwidth
  13824. @@ -1688,6 +1689,7 @@
  13825. config SLAB
  13826. bool "SLAB"
  13827. + depends on !PREEMPT_RT_FULL
  13828. help
  13829. The regular slab allocator that is established and known to work
  13830. well in all environments. It organizes cache hot objects in
  13831. @@ -1706,6 +1708,7 @@
  13832. config SLOB
  13833. depends on EXPERT
  13834. bool "SLOB (Simple Allocator)"
  13835. + depends on !PREEMPT_RT_FULL
  13836. help
  13837. SLOB replaces the stock allocator with a drastically simpler
  13838. allocator. SLOB is generally more space efficient but
  13839. @@ -1715,7 +1718,7 @@
  13840. config SLUB_CPU_PARTIAL
  13841. default y
  13842. - depends on SLUB && SMP
  13843. + depends on SLUB && SMP && !PREEMPT_RT_FULL
  13844. bool "SLUB per cpu partial cache"
  13845. help
  13846. Per cpu partial caches accellerate objects allocation and freeing
  13847. diff -Nur linux-4.1.39.orig/init/main.c linux-4.1.39/init/main.c
  13848. --- linux-4.1.39.orig/init/main.c 2017-03-13 21:04:36.000000000 +0100
  13849. +++ linux-4.1.39/init/main.c 2017-04-18 17:56:30.617397286 +0200
  13850. @@ -525,6 +525,7 @@
  13851. setup_command_line(command_line);
  13852. setup_nr_cpu_ids();
  13853. setup_per_cpu_areas();
  13854. + softirq_early_init();
  13855. smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
  13856. build_all_zonelists(NULL, NULL);
  13857. diff -Nur linux-4.1.39.orig/init/Makefile linux-4.1.39/init/Makefile
  13858. --- linux-4.1.39.orig/init/Makefile 2017-03-13 21:04:36.000000000 +0100
  13859. +++ linux-4.1.39/init/Makefile 2017-04-18 17:56:30.613397130 +0200
  13860. @@ -33,4 +33,4 @@
  13861. include/generated/compile.h: FORCE
  13862. @$($(quiet)chk_compile.h)
  13863. $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
  13864. - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
  13865. + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
  13866. diff -Nur linux-4.1.39.orig/ipc/mqueue.c linux-4.1.39/ipc/mqueue.c
  13867. --- linux-4.1.39.orig/ipc/mqueue.c 2017-03-13 21:04:36.000000000 +0100
  13868. +++ linux-4.1.39/ipc/mqueue.c 2017-04-18 17:56:30.617397286 +0200
  13869. @@ -47,8 +47,7 @@
  13870. #define RECV 1
  13871. #define STATE_NONE 0
  13872. -#define STATE_PENDING 1
  13873. -#define STATE_READY 2
  13874. +#define STATE_READY 1
  13875. struct posix_msg_tree_node {
  13876. struct rb_node rb_node;
  13877. @@ -568,15 +567,12 @@
  13878. wq_add(info, sr, ewp);
  13879. for (;;) {
  13880. - set_current_state(TASK_INTERRUPTIBLE);
  13881. + __set_current_state(TASK_INTERRUPTIBLE);
  13882. spin_unlock(&info->lock);
  13883. time = schedule_hrtimeout_range_clock(timeout, 0,
  13884. HRTIMER_MODE_ABS, CLOCK_REALTIME);
  13885. - while (ewp->state == STATE_PENDING)
  13886. - cpu_relax();
  13887. -
  13888. if (ewp->state == STATE_READY) {
  13889. retval = 0;
  13890. goto out;
  13891. @@ -904,11 +900,15 @@
  13892. * list of waiting receivers. A sender checks that list before adding the new
  13893. * message into the message array. If there is a waiting receiver, then it
  13894. * bypasses the message array and directly hands the message over to the
  13895. - * receiver.
  13896. - * The receiver accepts the message and returns without grabbing the queue
  13897. - * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
  13898. - * are necessary. The same algorithm is used for sysv semaphores, see
  13899. - * ipc/sem.c for more details.
  13900. + * receiver. The receiver accepts the message and returns without grabbing the
  13901. + * queue spinlock:
  13902. + *
  13903. + * - Set pointer to message.
  13904. + * - Queue the receiver task for later wakeup (without the info->lock).
  13905. + * - Update its state to STATE_READY. Now the receiver can continue.
  13906. + * - Wake up the process after the lock is dropped. Should the process wake up
  13907. + * before this wakeup (due to a timeout or a signal) it will either see
  13908. + * STATE_READY and continue or acquire the lock to check the state again.
  13909. *
  13910. * The same algorithm is used for senders.
  13911. */
  13912. @@ -916,21 +916,29 @@
  13913. /* pipelined_send() - send a message directly to the task waiting in
  13914. * sys_mq_timedreceive() (without inserting message into a queue).
  13915. */
  13916. -static inline void pipelined_send(struct mqueue_inode_info *info,
  13917. +static inline void pipelined_send(struct wake_q_head *wake_q,
  13918. + struct mqueue_inode_info *info,
  13919. struct msg_msg *message,
  13920. struct ext_wait_queue *receiver)
  13921. {
  13922. receiver->msg = message;
  13923. list_del(&receiver->list);
  13924. - receiver->state = STATE_PENDING;
  13925. - wake_up_process(receiver->task);
  13926. - smp_wmb();
  13927. + wake_q_add(wake_q, receiver->task);
  13928. + /*
  13929. + * Rely on the implicit cmpxchg barrier from wake_q_add such
  13930. + * that we can ensure that updating receiver->state is the last
  13931. + * write operation: As once set, the receiver can continue,
  13932. + * and if we don't have the reference count from the wake_q,
  13933. + * yet, at that point we can later have a use-after-free
  13934. + * condition and bogus wakeup.
  13935. + */
  13936. receiver->state = STATE_READY;
  13937. }
  13938. /* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
  13939. * gets its message and put to the queue (we have one free place for sure). */
  13940. -static inline void pipelined_receive(struct mqueue_inode_info *info)
  13941. +static inline void pipelined_receive(struct wake_q_head *wake_q,
  13942. + struct mqueue_inode_info *info)
  13943. {
  13944. struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND);
  13945. @@ -941,10 +949,9 @@
  13946. }
  13947. if (msg_insert(sender->msg, info))
  13948. return;
  13949. +
  13950. list_del(&sender->list);
  13951. - sender->state = STATE_PENDING;
  13952. - wake_up_process(sender->task);
  13953. - smp_wmb();
  13954. + wake_q_add(wake_q, sender->task);
  13955. sender->state = STATE_READY;
  13956. }
  13957. @@ -962,6 +969,7 @@
  13958. struct timespec ts;
  13959. struct posix_msg_tree_node *new_leaf = NULL;
  13960. int ret = 0;
  13961. + WAKE_Q(wake_q);
  13962. if (u_abs_timeout) {
  13963. int res = prepare_timeout(u_abs_timeout, &expires, &ts);
  13964. @@ -1045,7 +1053,7 @@
  13965. } else {
  13966. receiver = wq_get_first_waiter(info, RECV);
  13967. if (receiver) {
  13968. - pipelined_send(info, msg_ptr, receiver);
  13969. + pipelined_send(&wake_q, info, msg_ptr, receiver);
  13970. } else {
  13971. /* adds message to the queue */
  13972. ret = msg_insert(msg_ptr, info);
  13973. @@ -1058,6 +1066,7 @@
  13974. }
  13975. out_unlock:
  13976. spin_unlock(&info->lock);
  13977. + wake_up_q(&wake_q);
  13978. out_free:
  13979. if (ret)
  13980. free_msg(msg_ptr);
  13981. @@ -1144,14 +1153,17 @@
  13982. msg_ptr = wait.msg;
  13983. }
  13984. } else {
  13985. + WAKE_Q(wake_q);
  13986. +
  13987. msg_ptr = msg_get(info);
  13988. inode->i_atime = inode->i_mtime = inode->i_ctime =
  13989. CURRENT_TIME;
  13990. /* There is now free space in queue. */
  13991. - pipelined_receive(info);
  13992. + pipelined_receive(&wake_q, info);
  13993. spin_unlock(&info->lock);
  13994. + wake_up_q(&wake_q);
  13995. ret = 0;
  13996. }
  13997. if (ret == 0) {
  13998. diff -Nur linux-4.1.39.orig/ipc/msg.c linux-4.1.39/ipc/msg.c
  13999. --- linux-4.1.39.orig/ipc/msg.c 2017-03-13 21:04:36.000000000 +0100
  14000. +++ linux-4.1.39/ipc/msg.c 2017-04-18 17:56:30.617397286 +0200
  14001. @@ -188,6 +188,12 @@
  14002. struct msg_receiver *msr, *t;
  14003. list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
  14004. + /*
  14005. + * Make sure that the wakeup doesnt preempt
  14006. + * this CPU prematurely. (on PREEMPT_RT)
  14007. + */
  14008. + preempt_disable_rt();
  14009. +
  14010. msr->r_msg = NULL; /* initialize expunge ordering */
  14011. wake_up_process(msr->r_tsk);
  14012. /*
  14013. @@ -198,6 +204,8 @@
  14014. */
  14015. smp_mb();
  14016. msr->r_msg = ERR_PTR(res);
  14017. +
  14018. + preempt_enable_rt();
  14019. }
  14020. }
  14021. @@ -574,6 +582,11 @@
  14022. if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
  14023. !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
  14024. msr->r_msgtype, msr->r_mode)) {
  14025. + /*
  14026. + * Make sure that the wakeup doesnt preempt
  14027. + * this CPU prematurely. (on PREEMPT_RT)
  14028. + */
  14029. + preempt_disable_rt();
  14030. list_del(&msr->r_list);
  14031. if (msr->r_maxsize < msg->m_ts) {
  14032. @@ -595,12 +608,13 @@
  14033. */
  14034. smp_mb();
  14035. msr->r_msg = msg;
  14036. + preempt_enable_rt();
  14037. return 1;
  14038. }
  14039. + preempt_enable_rt();
  14040. }
  14041. }
  14042. -
  14043. return 0;
  14044. }
  14045. diff -Nur linux-4.1.39.orig/ipc/sem.c linux-4.1.39/ipc/sem.c
  14046. --- linux-4.1.39.orig/ipc/sem.c 2017-03-13 21:04:36.000000000 +0100
  14047. +++ linux-4.1.39/ipc/sem.c 2017-04-18 17:56:30.617397286 +0200
  14048. @@ -690,6 +690,13 @@
  14049. static void wake_up_sem_queue_prepare(struct list_head *pt,
  14050. struct sem_queue *q, int error)
  14051. {
  14052. +#ifdef CONFIG_PREEMPT_RT_BASE
  14053. + struct task_struct *p = q->sleeper;
  14054. + get_task_struct(p);
  14055. + q->status = error;
  14056. + wake_up_process(p);
  14057. + put_task_struct(p);
  14058. +#else
  14059. if (list_empty(pt)) {
  14060. /*
  14061. * Hold preempt off so that we don't get preempted and have the
  14062. @@ -701,6 +708,7 @@
  14063. q->pid = error;
  14064. list_add_tail(&q->list, pt);
  14065. +#endif
  14066. }
  14067. /**
  14068. @@ -714,6 +722,7 @@
  14069. */
  14070. static void wake_up_sem_queue_do(struct list_head *pt)
  14071. {
  14072. +#ifndef CONFIG_PREEMPT_RT_BASE
  14073. struct sem_queue *q, *t;
  14074. int did_something;
  14075. @@ -726,6 +735,7 @@
  14076. }
  14077. if (did_something)
  14078. preempt_enable();
  14079. +#endif
  14080. }
  14081. static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
  14082. diff -Nur linux-4.1.39.orig/kernel/bpf/hashtab.c linux-4.1.39/kernel/bpf/hashtab.c
  14083. --- linux-4.1.39.orig/kernel/bpf/hashtab.c 2017-03-13 21:04:36.000000000 +0100
  14084. +++ linux-4.1.39/kernel/bpf/hashtab.c 2017-04-18 17:56:30.617397286 +0200
  14085. @@ -17,7 +17,7 @@
  14086. struct bpf_htab {
  14087. struct bpf_map map;
  14088. struct hlist_head *buckets;
  14089. - spinlock_t lock;
  14090. + raw_spinlock_t lock;
  14091. u32 count; /* number of elements in this hashtable */
  14092. u32 n_buckets; /* number of hash buckets */
  14093. u32 elem_size; /* size of each element in bytes */
  14094. @@ -82,7 +82,7 @@
  14095. for (i = 0; i < htab->n_buckets; i++)
  14096. INIT_HLIST_HEAD(&htab->buckets[i]);
  14097. - spin_lock_init(&htab->lock);
  14098. + raw_spin_lock_init(&htab->lock);
  14099. htab->count = 0;
  14100. htab->elem_size = sizeof(struct htab_elem) +
  14101. @@ -230,7 +230,7 @@
  14102. l_new->hash = htab_map_hash(l_new->key, key_size);
  14103. /* bpf_map_update_elem() can be called in_irq() */
  14104. - spin_lock_irqsave(&htab->lock, flags);
  14105. + raw_spin_lock_irqsave(&htab->lock, flags);
  14106. head = select_bucket(htab, l_new->hash);
  14107. @@ -266,11 +266,11 @@
  14108. } else {
  14109. htab->count++;
  14110. }
  14111. - spin_unlock_irqrestore(&htab->lock, flags);
  14112. + raw_spin_unlock_irqrestore(&htab->lock, flags);
  14113. return 0;
  14114. err:
  14115. - spin_unlock_irqrestore(&htab->lock, flags);
  14116. + raw_spin_unlock_irqrestore(&htab->lock, flags);
  14117. kfree(l_new);
  14118. return ret;
  14119. }
  14120. @@ -291,7 +291,7 @@
  14121. hash = htab_map_hash(key, key_size);
  14122. - spin_lock_irqsave(&htab->lock, flags);
  14123. + raw_spin_lock_irqsave(&htab->lock, flags);
  14124. head = select_bucket(htab, hash);
  14125. @@ -304,7 +304,7 @@
  14126. ret = 0;
  14127. }
  14128. - spin_unlock_irqrestore(&htab->lock, flags);
  14129. + raw_spin_unlock_irqrestore(&htab->lock, flags);
  14130. return ret;
  14131. }
  14132. diff -Nur linux-4.1.39.orig/kernel/cgroup.c linux-4.1.39/kernel/cgroup.c
  14133. --- linux-4.1.39.orig/kernel/cgroup.c 2017-03-13 21:04:36.000000000 +0100
  14134. +++ linux-4.1.39/kernel/cgroup.c 2017-04-18 17:56:30.617397286 +0200
  14135. @@ -4423,10 +4423,10 @@
  14136. queue_work(cgroup_destroy_wq, &css->destroy_work);
  14137. }
  14138. -static void css_release_work_fn(struct work_struct *work)
  14139. +static void css_release_work_fn(struct swork_event *sev)
  14140. {
  14141. struct cgroup_subsys_state *css =
  14142. - container_of(work, struct cgroup_subsys_state, destroy_work);
  14143. + container_of(sev, struct cgroup_subsys_state, destroy_swork);
  14144. struct cgroup_subsys *ss = css->ss;
  14145. struct cgroup *cgrp = css->cgroup;
  14146. @@ -4465,8 +4465,8 @@
  14147. struct cgroup_subsys_state *css =
  14148. container_of(ref, struct cgroup_subsys_state, refcnt);
  14149. - INIT_WORK(&css->destroy_work, css_release_work_fn);
  14150. - queue_work(cgroup_destroy_wq, &css->destroy_work);
  14151. + INIT_SWORK(&css->destroy_swork, css_release_work_fn);
  14152. + swork_queue(&css->destroy_swork);
  14153. }
  14154. static void init_and_link_css(struct cgroup_subsys_state *css,
  14155. @@ -5080,6 +5080,7 @@
  14156. */
  14157. cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
  14158. BUG_ON(!cgroup_destroy_wq);
  14159. + BUG_ON(swork_get());
  14160. /*
  14161. * Used to destroy pidlists and separate to serve as flush domain.
  14162. diff -Nur linux-4.1.39.orig/kernel/cpu.c linux-4.1.39/kernel/cpu.c
  14163. --- linux-4.1.39.orig/kernel/cpu.c 2017-03-13 21:04:36.000000000 +0100
  14164. +++ linux-4.1.39/kernel/cpu.c 2017-04-18 17:56:30.617397286 +0200
  14165. @@ -74,8 +74,8 @@
  14166. #endif
  14167. } cpu_hotplug = {
  14168. .active_writer = NULL,
  14169. - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
  14170. .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
  14171. + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
  14172. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  14173. .dep_map = {.name = "cpu_hotplug.lock" },
  14174. #endif
  14175. @@ -88,6 +88,289 @@
  14176. #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
  14177. #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
  14178. +/**
  14179. + * hotplug_pcp - per cpu hotplug descriptor
  14180. + * @unplug: set when pin_current_cpu() needs to sync tasks
  14181. + * @sync_tsk: the task that waits for tasks to finish pinned sections
  14182. + * @refcount: counter of tasks in pinned sections
  14183. + * @grab_lock: set when the tasks entering pinned sections should wait
  14184. + * @synced: notifier for @sync_tsk to tell cpu_down it's finished
  14185. + * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
  14186. + * @mutex_init: zero if the mutex hasn't been initialized yet.
  14187. + *
  14188. + * Although @unplug and @sync_tsk may point to the same task, the @unplug
  14189. + * is used as a flag and still exists after @sync_tsk has exited and
  14190. + * @sync_tsk set to NULL.
  14191. + */
  14192. +struct hotplug_pcp {
  14193. + struct task_struct *unplug;
  14194. + struct task_struct *sync_tsk;
  14195. + int refcount;
  14196. + int grab_lock;
  14197. + struct completion synced;
  14198. + struct completion unplug_wait;
  14199. +#ifdef CONFIG_PREEMPT_RT_FULL
  14200. + /*
  14201. + * Note, on PREEMPT_RT, the hotplug lock must save the state of
  14202. + * the task, otherwise the mutex will cause the task to fail
  14203. + * to sleep when required. (Because it's called from migrate_disable())
  14204. + *
  14205. + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
  14206. + * state.
  14207. + */
  14208. + spinlock_t lock;
  14209. +#else
  14210. + struct mutex mutex;
  14211. +#endif
  14212. + int mutex_init;
  14213. +};
  14214. +
  14215. +#ifdef CONFIG_PREEMPT_RT_FULL
  14216. +# define hotplug_lock(hp) rt_spin_lock(&(hp)->lock)
  14217. +# define hotplug_unlock(hp) rt_spin_unlock(&(hp)->lock)
  14218. +#else
  14219. +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
  14220. +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
  14221. +#endif
  14222. +
  14223. +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
  14224. +
  14225. +/**
  14226. + * pin_current_cpu - Prevent the current cpu from being unplugged
  14227. + *
  14228. + * Lightweight version of get_online_cpus() to prevent cpu from being
  14229. + * unplugged when code runs in a migration disabled region.
  14230. + *
  14231. + * Must be called with preemption disabled (preempt_count = 1)!
  14232. + */
  14233. +void pin_current_cpu(void)
  14234. +{
  14235. + struct hotplug_pcp *hp;
  14236. + int force = 0;
  14237. +
  14238. +retry:
  14239. + hp = this_cpu_ptr(&hotplug_pcp);
  14240. +
  14241. + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
  14242. + hp->unplug == current) {
  14243. + hp->refcount++;
  14244. + return;
  14245. + }
  14246. + if (hp->grab_lock) {
  14247. + preempt_enable();
  14248. + hotplug_lock(hp);
  14249. + hotplug_unlock(hp);
  14250. + } else {
  14251. + preempt_enable();
  14252. + /*
  14253. + * Try to push this task off of this CPU.
  14254. + */
  14255. + if (!migrate_me()) {
  14256. + preempt_disable();
  14257. + hp = this_cpu_ptr(&hotplug_pcp);
  14258. + if (!hp->grab_lock) {
  14259. + /*
  14260. + * Just let it continue it's already pinned
  14261. + * or about to sleep.
  14262. + */
  14263. + force = 1;
  14264. + goto retry;
  14265. + }
  14266. + preempt_enable();
  14267. + }
  14268. + }
  14269. + preempt_disable();
  14270. + goto retry;
  14271. +}
  14272. +
  14273. +/**
  14274. + * unpin_current_cpu - Allow unplug of current cpu
  14275. + *
  14276. + * Must be called with preemption or interrupts disabled!
  14277. + */
  14278. +void unpin_current_cpu(void)
  14279. +{
  14280. + struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
  14281. +
  14282. + WARN_ON(hp->refcount <= 0);
  14283. +
  14284. + /* This is safe. sync_unplug_thread is pinned to this cpu */
  14285. + if (!--hp->refcount && hp->unplug && hp->unplug != current)
  14286. + wake_up_process(hp->unplug);
  14287. +}
  14288. +
  14289. +static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
  14290. +{
  14291. + set_current_state(TASK_UNINTERRUPTIBLE);
  14292. + while (hp->refcount) {
  14293. + schedule_preempt_disabled();
  14294. + set_current_state(TASK_UNINTERRUPTIBLE);
  14295. + }
  14296. +}
  14297. +
  14298. +static int sync_unplug_thread(void *data)
  14299. +{
  14300. + struct hotplug_pcp *hp = data;
  14301. +
  14302. + wait_for_completion(&hp->unplug_wait);
  14303. + preempt_disable();
  14304. + hp->unplug = current;
  14305. + wait_for_pinned_cpus(hp);
  14306. +
  14307. + /*
  14308. + * This thread will synchronize the cpu_down() with threads
  14309. + * that have pinned the CPU. When the pinned CPU count reaches
  14310. + * zero, we inform the cpu_down code to continue to the next step.
  14311. + */
  14312. + set_current_state(TASK_UNINTERRUPTIBLE);
  14313. + preempt_enable();
  14314. + complete(&hp->synced);
  14315. +
  14316. + /*
  14317. + * If all succeeds, the next step will need tasks to wait till
  14318. + * the CPU is offline before continuing. To do this, the grab_lock
  14319. + * is set and tasks going into pin_current_cpu() will block on the
  14320. + * mutex. But we still need to wait for those that are already in
  14321. + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
  14322. + * will kick this thread out.
  14323. + */
  14324. + while (!hp->grab_lock && !kthread_should_stop()) {
  14325. + schedule();
  14326. + set_current_state(TASK_UNINTERRUPTIBLE);
  14327. + }
  14328. +
  14329. + /* Make sure grab_lock is seen before we see a stale completion */
  14330. + smp_mb();
  14331. +
  14332. + /*
  14333. + * Now just before cpu_down() enters stop machine, we need to make
  14334. + * sure all tasks that are in pinned CPU sections are out, and new
  14335. + * tasks will now grab the lock, keeping them from entering pinned
  14336. + * CPU sections.
  14337. + */
  14338. + if (!kthread_should_stop()) {
  14339. + preempt_disable();
  14340. + wait_for_pinned_cpus(hp);
  14341. + preempt_enable();
  14342. + complete(&hp->synced);
  14343. + }
  14344. +
  14345. + set_current_state(TASK_UNINTERRUPTIBLE);
  14346. + while (!kthread_should_stop()) {
  14347. + schedule();
  14348. + set_current_state(TASK_UNINTERRUPTIBLE);
  14349. + }
  14350. + set_current_state(TASK_RUNNING);
  14351. +
  14352. + /*
  14353. + * Force this thread off this CPU as it's going down and
  14354. + * we don't want any more work on this CPU.
  14355. + */
  14356. + current->flags &= ~PF_NO_SETAFFINITY;
  14357. + set_cpus_allowed_ptr(current, cpu_present_mask);
  14358. + migrate_me();
  14359. + return 0;
  14360. +}
  14361. +
  14362. +static void __cpu_unplug_sync(struct hotplug_pcp *hp)
  14363. +{
  14364. + wake_up_process(hp->sync_tsk);
  14365. + wait_for_completion(&hp->synced);
  14366. +}
  14367. +
  14368. +static void __cpu_unplug_wait(unsigned int cpu)
  14369. +{
  14370. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  14371. +
  14372. + complete(&hp->unplug_wait);
  14373. + wait_for_completion(&hp->synced);
  14374. +}
  14375. +
  14376. +/*
  14377. + * Start the sync_unplug_thread on the target cpu and wait for it to
  14378. + * complete.
  14379. + */
  14380. +static int cpu_unplug_begin(unsigned int cpu)
  14381. +{
  14382. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  14383. + int err;
  14384. +
  14385. + /* Protected by cpu_hotplug.lock */
  14386. + if (!hp->mutex_init) {
  14387. +#ifdef CONFIG_PREEMPT_RT_FULL
  14388. + spin_lock_init(&hp->lock);
  14389. +#else
  14390. + mutex_init(&hp->mutex);
  14391. +#endif
  14392. + hp->mutex_init = 1;
  14393. + }
  14394. +
  14395. + /* Inform the scheduler to migrate tasks off this CPU */
  14396. + tell_sched_cpu_down_begin(cpu);
  14397. +
  14398. + init_completion(&hp->synced);
  14399. + init_completion(&hp->unplug_wait);
  14400. +
  14401. + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
  14402. + if (IS_ERR(hp->sync_tsk)) {
  14403. + err = PTR_ERR(hp->sync_tsk);
  14404. + hp->sync_tsk = NULL;
  14405. + return err;
  14406. + }
  14407. + kthread_bind(hp->sync_tsk, cpu);
  14408. +
  14409. + /*
  14410. + * Wait for tasks to get out of the pinned sections,
  14411. + * it's still OK if new tasks enter. Some CPU notifiers will
  14412. + * wait for tasks that are going to enter these sections and
  14413. + * we must not have them block.
  14414. + */
  14415. + wake_up_process(hp->sync_tsk);
  14416. + return 0;
  14417. +}
  14418. +
  14419. +static void cpu_unplug_sync(unsigned int cpu)
  14420. +{
  14421. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  14422. +
  14423. + init_completion(&hp->synced);
  14424. + /* The completion needs to be initialzied before setting grab_lock */
  14425. + smp_wmb();
  14426. +
  14427. + /* Grab the mutex before setting grab_lock */
  14428. + hotplug_lock(hp);
  14429. + hp->grab_lock = 1;
  14430. +
  14431. + /*
  14432. + * The CPU notifiers have been completed.
  14433. + * Wait for tasks to get out of pinned CPU sections and have new
  14434. + * tasks block until the CPU is completely down.
  14435. + */
  14436. + __cpu_unplug_sync(hp);
  14437. +
  14438. + /* All done with the sync thread */
  14439. + kthread_stop(hp->sync_tsk);
  14440. + hp->sync_tsk = NULL;
  14441. +}
  14442. +
  14443. +static void cpu_unplug_done(unsigned int cpu)
  14444. +{
  14445. + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
  14446. +
  14447. + hp->unplug = NULL;
  14448. + /* Let all tasks know cpu unplug is finished before cleaning up */
  14449. + smp_wmb();
  14450. +
  14451. + if (hp->sync_tsk)
  14452. + kthread_stop(hp->sync_tsk);
  14453. +
  14454. + if (hp->grab_lock) {
  14455. + hotplug_unlock(hp);
  14456. + /* protected by cpu_hotplug.lock */
  14457. + hp->grab_lock = 0;
  14458. + }
  14459. + tell_sched_cpu_down_done(cpu);
  14460. +}
  14461. void get_online_cpus(void)
  14462. {
  14463. @@ -349,13 +632,15 @@
  14464. /* Requires cpu_add_remove_lock to be held */
  14465. static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
  14466. {
  14467. - int err, nr_calls = 0;
  14468. + int mycpu, err, nr_calls = 0;
  14469. void *hcpu = (void *)(long)cpu;
  14470. unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
  14471. struct take_cpu_down_param tcd_param = {
  14472. .mod = mod,
  14473. .hcpu = hcpu,
  14474. };
  14475. + cpumask_var_t cpumask;
  14476. + cpumask_var_t cpumask_org;
  14477. if (num_online_cpus() == 1)
  14478. return -EBUSY;
  14479. @@ -363,7 +648,34 @@
  14480. if (!cpu_online(cpu))
  14481. return -EINVAL;
  14482. + /* Move the downtaker off the unplug cpu */
  14483. + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
  14484. + return -ENOMEM;
  14485. + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) {
  14486. + free_cpumask_var(cpumask);
  14487. + return -ENOMEM;
  14488. + }
  14489. +
  14490. + cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
  14491. + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
  14492. + set_cpus_allowed_ptr(current, cpumask);
  14493. + free_cpumask_var(cpumask);
  14494. + migrate_disable();
  14495. + mycpu = smp_processor_id();
  14496. + if (mycpu == cpu) {
  14497. + printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
  14498. + migrate_enable();
  14499. + err = -EBUSY;
  14500. + goto restore_cpus;
  14501. + }
  14502. + migrate_enable();
  14503. +
  14504. cpu_hotplug_begin();
  14505. + err = cpu_unplug_begin(cpu);
  14506. + if (err) {
  14507. + printk("cpu_unplug_begin(%d) failed\n", cpu);
  14508. + goto out_cancel;
  14509. + }
  14510. err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
  14511. if (err) {
  14512. @@ -389,8 +701,12 @@
  14513. #endif
  14514. synchronize_rcu();
  14515. + __cpu_unplug_wait(cpu);
  14516. smpboot_park_threads(cpu);
  14517. + /* Notifiers are done. Don't let any more tasks pin this CPU. */
  14518. + cpu_unplug_sync(cpu);
  14519. +
  14520. /*
  14521. * So now all preempt/rcu users must observe !cpu_active().
  14522. */
  14523. @@ -427,9 +743,14 @@
  14524. check_for_tasks(cpu);
  14525. out_release:
  14526. + cpu_unplug_done(cpu);
  14527. +out_cancel:
  14528. cpu_hotplug_done();
  14529. if (!err)
  14530. cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
  14531. +restore_cpus:
  14532. + set_cpus_allowed_ptr(current, cpumask_org);
  14533. + free_cpumask_var(cpumask_org);
  14534. return err;
  14535. }
  14536. diff -Nur linux-4.1.39.orig/kernel/cpuset.c linux-4.1.39/kernel/cpuset.c
  14537. --- linux-4.1.39.orig/kernel/cpuset.c 2017-03-13 21:04:36.000000000 +0100
  14538. +++ linux-4.1.39/kernel/cpuset.c 2017-04-18 17:56:30.617397286 +0200
  14539. @@ -284,7 +284,7 @@
  14540. */
  14541. static DEFINE_MUTEX(cpuset_mutex);
  14542. -static DEFINE_SPINLOCK(callback_lock);
  14543. +static DEFINE_RAW_SPINLOCK(callback_lock);
  14544. /*
  14545. * CPU / memory hotplug is handled asynchronously.
  14546. @@ -903,9 +903,9 @@
  14547. continue;
  14548. rcu_read_unlock();
  14549. - spin_lock_irq(&callback_lock);
  14550. + raw_spin_lock_irq(&callback_lock);
  14551. cpumask_copy(cp->effective_cpus, new_cpus);
  14552. - spin_unlock_irq(&callback_lock);
  14553. + raw_spin_unlock_irq(&callback_lock);
  14554. WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
  14555. !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
  14556. @@ -970,9 +970,9 @@
  14557. if (retval < 0)
  14558. return retval;
  14559. - spin_lock_irq(&callback_lock);
  14560. + raw_spin_lock_irq(&callback_lock);
  14561. cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
  14562. - spin_unlock_irq(&callback_lock);
  14563. + raw_spin_unlock_irq(&callback_lock);
  14564. /* use trialcs->cpus_allowed as a temp variable */
  14565. update_cpumasks_hier(cs, trialcs->cpus_allowed);
  14566. @@ -1159,9 +1159,9 @@
  14567. continue;
  14568. rcu_read_unlock();
  14569. - spin_lock_irq(&callback_lock);
  14570. + raw_spin_lock_irq(&callback_lock);
  14571. cp->effective_mems = *new_mems;
  14572. - spin_unlock_irq(&callback_lock);
  14573. + raw_spin_unlock_irq(&callback_lock);
  14574. WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
  14575. !nodes_equal(cp->mems_allowed, cp->effective_mems));
  14576. @@ -1229,9 +1229,9 @@
  14577. if (retval < 0)
  14578. goto done;
  14579. - spin_lock_irq(&callback_lock);
  14580. + raw_spin_lock_irq(&callback_lock);
  14581. cs->mems_allowed = trialcs->mems_allowed;
  14582. - spin_unlock_irq(&callback_lock);
  14583. + raw_spin_unlock_irq(&callback_lock);
  14584. /* use trialcs->mems_allowed as a temp variable */
  14585. update_nodemasks_hier(cs, &trialcs->mems_allowed);
  14586. @@ -1322,9 +1322,9 @@
  14587. spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
  14588. || (is_spread_page(cs) != is_spread_page(trialcs)));
  14589. - spin_lock_irq(&callback_lock);
  14590. + raw_spin_lock_irq(&callback_lock);
  14591. cs->flags = trialcs->flags;
  14592. - spin_unlock_irq(&callback_lock);
  14593. + raw_spin_unlock_irq(&callback_lock);
  14594. if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
  14595. rebuild_sched_domains_locked();
  14596. @@ -1726,7 +1726,7 @@
  14597. cpuset_filetype_t type = seq_cft(sf)->private;
  14598. int ret = 0;
  14599. - spin_lock_irq(&callback_lock);
  14600. + raw_spin_lock_irq(&callback_lock);
  14601. switch (type) {
  14602. case FILE_CPULIST:
  14603. @@ -1745,7 +1745,7 @@
  14604. ret = -EINVAL;
  14605. }
  14606. - spin_unlock_irq(&callback_lock);
  14607. + raw_spin_unlock_irq(&callback_lock);
  14608. return ret;
  14609. }
  14610. @@ -1962,12 +1962,12 @@
  14611. cpuset_inc();
  14612. - spin_lock_irq(&callback_lock);
  14613. + raw_spin_lock_irq(&callback_lock);
  14614. if (cgroup_on_dfl(cs->css.cgroup)) {
  14615. cpumask_copy(cs->effective_cpus, parent->effective_cpus);
  14616. cs->effective_mems = parent->effective_mems;
  14617. }
  14618. - spin_unlock_irq(&callback_lock);
  14619. + raw_spin_unlock_irq(&callback_lock);
  14620. if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
  14621. goto out_unlock;
  14622. @@ -1994,12 +1994,12 @@
  14623. }
  14624. rcu_read_unlock();
  14625. - spin_lock_irq(&callback_lock);
  14626. + raw_spin_lock_irq(&callback_lock);
  14627. cs->mems_allowed = parent->mems_allowed;
  14628. cs->effective_mems = parent->mems_allowed;
  14629. cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
  14630. cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
  14631. - spin_unlock_irq(&callback_lock);
  14632. + raw_spin_unlock_irq(&callback_lock);
  14633. out_unlock:
  14634. mutex_unlock(&cpuset_mutex);
  14635. return 0;
  14636. @@ -2038,7 +2038,7 @@
  14637. static void cpuset_bind(struct cgroup_subsys_state *root_css)
  14638. {
  14639. mutex_lock(&cpuset_mutex);
  14640. - spin_lock_irq(&callback_lock);
  14641. + raw_spin_lock_irq(&callback_lock);
  14642. if (cgroup_on_dfl(root_css->cgroup)) {
  14643. cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
  14644. @@ -2049,7 +2049,7 @@
  14645. top_cpuset.mems_allowed = top_cpuset.effective_mems;
  14646. }
  14647. - spin_unlock_irq(&callback_lock);
  14648. + raw_spin_unlock_irq(&callback_lock);
  14649. mutex_unlock(&cpuset_mutex);
  14650. }
  14651. @@ -2149,12 +2149,12 @@
  14652. {
  14653. bool is_empty;
  14654. - spin_lock_irq(&callback_lock);
  14655. + raw_spin_lock_irq(&callback_lock);
  14656. cpumask_copy(cs->cpus_allowed, new_cpus);
  14657. cpumask_copy(cs->effective_cpus, new_cpus);
  14658. cs->mems_allowed = *new_mems;
  14659. cs->effective_mems = *new_mems;
  14660. - spin_unlock_irq(&callback_lock);
  14661. + raw_spin_unlock_irq(&callback_lock);
  14662. /*
  14663. * Don't call update_tasks_cpumask() if the cpuset becomes empty,
  14664. @@ -2191,10 +2191,10 @@
  14665. if (nodes_empty(*new_mems))
  14666. *new_mems = parent_cs(cs)->effective_mems;
  14667. - spin_lock_irq(&callback_lock);
  14668. + raw_spin_lock_irq(&callback_lock);
  14669. cpumask_copy(cs->effective_cpus, new_cpus);
  14670. cs->effective_mems = *new_mems;
  14671. - spin_unlock_irq(&callback_lock);
  14672. + raw_spin_unlock_irq(&callback_lock);
  14673. if (cpus_updated)
  14674. update_tasks_cpumask(cs);
  14675. @@ -2280,21 +2280,21 @@
  14676. /* synchronize cpus_allowed to cpu_active_mask */
  14677. if (cpus_updated) {
  14678. - spin_lock_irq(&callback_lock);
  14679. + raw_spin_lock_irq(&callback_lock);
  14680. if (!on_dfl)
  14681. cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
  14682. cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
  14683. - spin_unlock_irq(&callback_lock);
  14684. + raw_spin_unlock_irq(&callback_lock);
  14685. /* we don't mess with cpumasks of tasks in top_cpuset */
  14686. }
  14687. /* synchronize mems_allowed to N_MEMORY */
  14688. if (mems_updated) {
  14689. - spin_lock_irq(&callback_lock);
  14690. + raw_spin_lock_irq(&callback_lock);
  14691. if (!on_dfl)
  14692. top_cpuset.mems_allowed = new_mems;
  14693. top_cpuset.effective_mems = new_mems;
  14694. - spin_unlock_irq(&callback_lock);
  14695. + raw_spin_unlock_irq(&callback_lock);
  14696. update_tasks_nodemask(&top_cpuset);
  14697. }
  14698. @@ -2389,11 +2389,11 @@
  14699. {
  14700. unsigned long flags;
  14701. - spin_lock_irqsave(&callback_lock, flags);
  14702. + raw_spin_lock_irqsave(&callback_lock, flags);
  14703. rcu_read_lock();
  14704. guarantee_online_cpus(task_cs(tsk), pmask);
  14705. rcu_read_unlock();
  14706. - spin_unlock_irqrestore(&callback_lock, flags);
  14707. + raw_spin_unlock_irqrestore(&callback_lock, flags);
  14708. }
  14709. void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
  14710. @@ -2441,11 +2441,11 @@
  14711. nodemask_t mask;
  14712. unsigned long flags;
  14713. - spin_lock_irqsave(&callback_lock, flags);
  14714. + raw_spin_lock_irqsave(&callback_lock, flags);
  14715. rcu_read_lock();
  14716. guarantee_online_mems(task_cs(tsk), &mask);
  14717. rcu_read_unlock();
  14718. - spin_unlock_irqrestore(&callback_lock, flags);
  14719. + raw_spin_unlock_irqrestore(&callback_lock, flags);
  14720. return mask;
  14721. }
  14722. @@ -2537,14 +2537,14 @@
  14723. return 1;
  14724. /* Not hardwall and node outside mems_allowed: scan up cpusets */
  14725. - spin_lock_irqsave(&callback_lock, flags);
  14726. + raw_spin_lock_irqsave(&callback_lock, flags);
  14727. rcu_read_lock();
  14728. cs = nearest_hardwall_ancestor(task_cs(current));
  14729. allowed = node_isset(node, cs->mems_allowed);
  14730. rcu_read_unlock();
  14731. - spin_unlock_irqrestore(&callback_lock, flags);
  14732. + raw_spin_unlock_irqrestore(&callback_lock, flags);
  14733. return allowed;
  14734. }
  14735. diff -Nur linux-4.1.39.orig/kernel/debug/kdb/kdb_io.c linux-4.1.39/kernel/debug/kdb/kdb_io.c
  14736. --- linux-4.1.39.orig/kernel/debug/kdb/kdb_io.c 2017-03-13 21:04:36.000000000 +0100
  14737. +++ linux-4.1.39/kernel/debug/kdb/kdb_io.c 2017-04-18 17:56:30.617397286 +0200
  14738. @@ -554,7 +554,6 @@
  14739. int linecount;
  14740. int colcount;
  14741. int logging, saved_loglevel = 0;
  14742. - int saved_trap_printk;
  14743. int got_printf_lock = 0;
  14744. int retlen = 0;
  14745. int fnd, len;
  14746. @@ -565,8 +564,6 @@
  14747. unsigned long uninitialized_var(flags);
  14748. preempt_disable();
  14749. - saved_trap_printk = kdb_trap_printk;
  14750. - kdb_trap_printk = 0;
  14751. /* Serialize kdb_printf if multiple cpus try to write at once.
  14752. * But if any cpu goes recursive in kdb, just print the output,
  14753. @@ -855,7 +852,6 @@
  14754. } else {
  14755. __release(kdb_printf_lock);
  14756. }
  14757. - kdb_trap_printk = saved_trap_printk;
  14758. preempt_enable();
  14759. return retlen;
  14760. }
  14761. @@ -865,9 +861,11 @@
  14762. va_list ap;
  14763. int r;
  14764. + kdb_trap_printk++;
  14765. va_start(ap, fmt);
  14766. r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
  14767. va_end(ap);
  14768. + kdb_trap_printk--;
  14769. return r;
  14770. }
  14771. diff -Nur linux-4.1.39.orig/kernel/events/core.c linux-4.1.39/kernel/events/core.c
  14772. --- linux-4.1.39.orig/kernel/events/core.c 2017-03-13 21:04:36.000000000 +0100
  14773. +++ linux-4.1.39/kernel/events/core.c 2017-04-18 17:56:30.621397441 +0200
  14774. @@ -6948,6 +6948,7 @@
  14775. hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  14776. hwc->hrtimer.function = perf_swevent_hrtimer;
  14777. + hwc->hrtimer.irqsafe = 1;
  14778. /*
  14779. * Since hrtimers have a fixed rate, we can do a static freq->period
  14780. diff -Nur linux-4.1.39.orig/kernel/exit.c linux-4.1.39/kernel/exit.c
  14781. --- linux-4.1.39.orig/kernel/exit.c 2017-03-13 21:04:36.000000000 +0100
  14782. +++ linux-4.1.39/kernel/exit.c 2017-04-18 17:56:30.621397441 +0200
  14783. @@ -144,7 +144,7 @@
  14784. * Do this under ->siglock, we can race with another thread
  14785. * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
  14786. */
  14787. - flush_sigqueue(&tsk->pending);
  14788. + flush_task_sigqueue(tsk);
  14789. tsk->sighand = NULL;
  14790. spin_unlock(&sighand->siglock);
  14791. diff -Nur linux-4.1.39.orig/kernel/fork.c linux-4.1.39/kernel/fork.c
  14792. --- linux-4.1.39.orig/kernel/fork.c 2017-03-13 21:04:36.000000000 +0100
  14793. +++ linux-4.1.39/kernel/fork.c 2017-04-18 17:56:30.621397441 +0200
  14794. @@ -108,7 +108,7 @@
  14795. DEFINE_PER_CPU(unsigned long, process_counts) = 0;
  14796. -__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
  14797. +DEFINE_RWLOCK(tasklist_lock); /* outer */
  14798. #ifdef CONFIG_PROVE_RCU
  14799. int lockdep_tasklist_lock_is_held(void)
  14800. @@ -244,7 +244,9 @@
  14801. if (atomic_dec_and_test(&sig->sigcnt))
  14802. free_signal_struct(sig);
  14803. }
  14804. -
  14805. +#ifdef CONFIG_PREEMPT_RT_BASE
  14806. +static
  14807. +#endif
  14808. void __put_task_struct(struct task_struct *tsk)
  14809. {
  14810. WARN_ON(!tsk->exit_state);
  14811. @@ -260,7 +262,18 @@
  14812. if (!profile_handoff_task(tsk))
  14813. free_task(tsk);
  14814. }
  14815. +#ifndef CONFIG_PREEMPT_RT_BASE
  14816. EXPORT_SYMBOL_GPL(__put_task_struct);
  14817. +#else
  14818. +void __put_task_struct_cb(struct rcu_head *rhp)
  14819. +{
  14820. + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu);
  14821. +
  14822. + __put_task_struct(tsk);
  14823. +
  14824. +}
  14825. +EXPORT_SYMBOL_GPL(__put_task_struct_cb);
  14826. +#endif
  14827. void __init __weak arch_task_cache_init(void) { }
  14828. @@ -374,6 +387,7 @@
  14829. #endif
  14830. tsk->splice_pipe = NULL;
  14831. tsk->task_frag.page = NULL;
  14832. + tsk->wake_q.next = NULL;
  14833. account_kernel_stack(ti, 1);
  14834. @@ -680,6 +694,19 @@
  14835. }
  14836. EXPORT_SYMBOL_GPL(__mmdrop);
  14837. +#ifdef CONFIG_PREEMPT_RT_BASE
  14838. +/*
  14839. + * RCU callback for delayed mm drop. Not strictly rcu, but we don't
  14840. + * want another facility to make this work.
  14841. + */
  14842. +void __mmdrop_delayed(struct rcu_head *rhp)
  14843. +{
  14844. + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
  14845. +
  14846. + __mmdrop(mm);
  14847. +}
  14848. +#endif
  14849. +
  14850. /*
  14851. * Decrement the use count and release all resources for an mm.
  14852. */
  14853. @@ -1214,6 +1241,9 @@
  14854. */
  14855. static void posix_cpu_timers_init(struct task_struct *tsk)
  14856. {
  14857. +#ifdef CONFIG_PREEMPT_RT_BASE
  14858. + tsk->posix_timer_list = NULL;
  14859. +#endif
  14860. tsk->cputime_expires.prof_exp = 0;
  14861. tsk->cputime_expires.virt_exp = 0;
  14862. tsk->cputime_expires.sched_exp = 0;
  14863. @@ -1338,6 +1368,7 @@
  14864. spin_lock_init(&p->alloc_lock);
  14865. init_sigpending(&p->pending);
  14866. + p->sigqueue_cache = NULL;
  14867. p->utime = p->stime = p->gtime = 0;
  14868. p->utimescaled = p->stimescaled = 0;
  14869. @@ -1345,7 +1376,8 @@
  14870. p->prev_cputime.utime = p->prev_cputime.stime = 0;
  14871. #endif
  14872. #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  14873. - seqlock_init(&p->vtime_seqlock);
  14874. + raw_spin_lock_init(&p->vtime_lock);
  14875. + seqcount_init(&p->vtime_seq);
  14876. p->vtime_snap = 0;
  14877. p->vtime_snap_whence = VTIME_SLEEPING;
  14878. #endif
  14879. @@ -1396,6 +1428,9 @@
  14880. p->hardirq_context = 0;
  14881. p->softirq_context = 0;
  14882. #endif
  14883. +
  14884. + p->pagefault_disabled = 0;
  14885. +
  14886. #ifdef CONFIG_LOCKDEP
  14887. p->lockdep_depth = 0; /* no locks held yet */
  14888. p->curr_chain_key = 0;
  14889. diff -Nur linux-4.1.39.orig/kernel/futex.c linux-4.1.39/kernel/futex.c
  14890. --- linux-4.1.39.orig/kernel/futex.c 2017-03-13 21:04:36.000000000 +0100
  14891. +++ linux-4.1.39/kernel/futex.c 2017-04-18 17:56:30.621397441 +0200
  14892. @@ -738,7 +738,9 @@
  14893. * task still owns the PI-state:
  14894. */
  14895. if (head->next != next) {
  14896. + raw_spin_unlock_irq(&curr->pi_lock);
  14897. spin_unlock(&hb->lock);
  14898. + raw_spin_lock_irq(&curr->pi_lock);
  14899. continue;
  14900. }
  14901. @@ -1090,9 +1092,11 @@
  14902. /*
  14903. * The hash bucket lock must be held when this is called.
  14904. - * Afterwards, the futex_q must not be accessed.
  14905. + * Afterwards, the futex_q must not be accessed. Callers
  14906. + * must ensure to later call wake_up_q() for the actual
  14907. + * wakeups to occur.
  14908. */
  14909. -static void wake_futex(struct futex_q *q)
  14910. +static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
  14911. {
  14912. struct task_struct *p = q->task;
  14913. @@ -1100,14 +1104,10 @@
  14914. return;
  14915. /*
  14916. - * We set q->lock_ptr = NULL _before_ we wake up the task. If
  14917. - * a non-futex wake up happens on another CPU then the task
  14918. - * might exit and p would dereference a non-existing task
  14919. - * struct. Prevent this by holding a reference on p across the
  14920. - * wake up.
  14921. + * Queue the task for later wakeup for after we've released
  14922. + * the hb->lock. wake_q_add() grabs reference to p.
  14923. */
  14924. - get_task_struct(p);
  14925. -
  14926. + wake_q_add(wake_q, p);
  14927. __unqueue_futex(q);
  14928. /*
  14929. * The waiting task can free the futex_q as soon as
  14930. @@ -1117,16 +1117,15 @@
  14931. */
  14932. smp_wmb();
  14933. q->lock_ptr = NULL;
  14934. -
  14935. - wake_up_state(p, TASK_NORMAL);
  14936. - put_task_struct(p);
  14937. }
  14938. -static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
  14939. +static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
  14940. + struct futex_hash_bucket *hb)
  14941. {
  14942. struct task_struct *new_owner;
  14943. struct futex_pi_state *pi_state = this->pi_state;
  14944. u32 uninitialized_var(curval), newval;
  14945. + bool deboost;
  14946. int ret = 0;
  14947. if (!pi_state)
  14948. @@ -1188,7 +1187,17 @@
  14949. raw_spin_unlock_irq(&new_owner->pi_lock);
  14950. raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
  14951. - rt_mutex_unlock(&pi_state->pi_mutex);
  14952. +
  14953. + deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex);
  14954. +
  14955. + /*
  14956. + * We deboost after dropping hb->lock. That prevents a double
  14957. + * wakeup on RT.
  14958. + */
  14959. + spin_unlock(&hb->lock);
  14960. +
  14961. + if (deboost)
  14962. + rt_mutex_adjust_prio(current);
  14963. return 0;
  14964. }
  14965. @@ -1227,6 +1236,7 @@
  14966. struct futex_q *this, *next;
  14967. union futex_key key = FUTEX_KEY_INIT;
  14968. int ret;
  14969. + WAKE_Q(wake_q);
  14970. if (!bitset)
  14971. return -EINVAL;
  14972. @@ -1254,13 +1264,14 @@
  14973. if (!(this->bitset & bitset))
  14974. continue;
  14975. - wake_futex(this);
  14976. + mark_wake_futex(&wake_q, this);
  14977. if (++ret >= nr_wake)
  14978. break;
  14979. }
  14980. }
  14981. spin_unlock(&hb->lock);
  14982. + wake_up_q(&wake_q);
  14983. out_put_key:
  14984. put_futex_key(&key);
  14985. out:
  14986. @@ -1279,6 +1290,7 @@
  14987. struct futex_hash_bucket *hb1, *hb2;
  14988. struct futex_q *this, *next;
  14989. int ret, op_ret;
  14990. + WAKE_Q(wake_q);
  14991. retry:
  14992. ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
  14993. @@ -1330,7 +1342,7 @@
  14994. ret = -EINVAL;
  14995. goto out_unlock;
  14996. }
  14997. - wake_futex(this);
  14998. + mark_wake_futex(&wake_q, this);
  14999. if (++ret >= nr_wake)
  15000. break;
  15001. }
  15002. @@ -1344,7 +1356,7 @@
  15003. ret = -EINVAL;
  15004. goto out_unlock;
  15005. }
  15006. - wake_futex(this);
  15007. + mark_wake_futex(&wake_q, this);
  15008. if (++op_ret >= nr_wake2)
  15009. break;
  15010. }
  15011. @@ -1354,6 +1366,7 @@
  15012. out_unlock:
  15013. double_unlock_hb(hb1, hb2);
  15014. + wake_up_q(&wake_q);
  15015. out_put_keys:
  15016. put_futex_key(&key2);
  15017. out_put_key1:
  15018. @@ -1513,6 +1526,7 @@
  15019. struct futex_pi_state *pi_state = NULL;
  15020. struct futex_hash_bucket *hb1, *hb2;
  15021. struct futex_q *this, *next;
  15022. + WAKE_Q(wake_q);
  15023. if (requeue_pi) {
  15024. /*
  15025. @@ -1689,7 +1703,7 @@
  15026. * woken by futex_unlock_pi().
  15027. */
  15028. if (++task_count <= nr_wake && !requeue_pi) {
  15029. - wake_futex(this);
  15030. + mark_wake_futex(&wake_q, this);
  15031. continue;
  15032. }
  15033. @@ -1715,6 +1729,16 @@
  15034. requeue_pi_wake_futex(this, &key2, hb2);
  15035. drop_count++;
  15036. continue;
  15037. + } else if (ret == -EAGAIN) {
  15038. + /*
  15039. + * Waiter was woken by timeout or
  15040. + * signal and has set pi_blocked_on to
  15041. + * PI_WAKEUP_INPROGRESS before we
  15042. + * tried to enqueue it on the rtmutex.
  15043. + */
  15044. + this->pi_state = NULL;
  15045. + free_pi_state(pi_state);
  15046. + continue;
  15047. } else if (ret) {
  15048. /* -EDEADLK */
  15049. this->pi_state = NULL;
  15050. @@ -1729,6 +1753,7 @@
  15051. out_unlock:
  15052. free_pi_state(pi_state);
  15053. double_unlock_hb(hb1, hb2);
  15054. + wake_up_q(&wake_q);
  15055. hb_waiters_dec(hb2);
  15056. /*
  15057. @@ -2422,13 +2447,22 @@
  15058. */
  15059. match = futex_top_waiter(hb, &key);
  15060. if (match) {
  15061. - ret = wake_futex_pi(uaddr, uval, match);
  15062. + ret = wake_futex_pi(uaddr, uval, match, hb);
  15063. +
  15064. + /*
  15065. + * In case of success wake_futex_pi dropped the hash
  15066. + * bucket lock.
  15067. + */
  15068. + if (!ret)
  15069. + goto out_putkey;
  15070. +
  15071. /*
  15072. * The atomic access to the futex value generated a
  15073. * pagefault, so retry the user-access and the wakeup:
  15074. */
  15075. if (ret == -EFAULT)
  15076. goto pi_faulted;
  15077. +
  15078. /*
  15079. * A unconditional UNLOCK_PI op raced against a waiter
  15080. * setting the FUTEX_WAITERS bit. Try again.
  15081. @@ -2438,6 +2472,11 @@
  15082. put_futex_key(&key);
  15083. goto retry;
  15084. }
  15085. +
  15086. + /*
  15087. + * wake_futex_pi has detected invalid state. Tell user
  15088. + * space.
  15089. + */
  15090. goto out_unlock;
  15091. }
  15092. @@ -2458,6 +2497,7 @@
  15093. out_unlock:
  15094. spin_unlock(&hb->lock);
  15095. +out_putkey:
  15096. put_futex_key(&key);
  15097. return ret;
  15098. @@ -2568,7 +2608,7 @@
  15099. struct hrtimer_sleeper timeout, *to = NULL;
  15100. struct rt_mutex_waiter rt_waiter;
  15101. struct rt_mutex *pi_mutex = NULL;
  15102. - struct futex_hash_bucket *hb;
  15103. + struct futex_hash_bucket *hb, *hb2;
  15104. union futex_key key2 = FUTEX_KEY_INIT;
  15105. struct futex_q q = futex_q_init;
  15106. int res, ret;
  15107. @@ -2593,10 +2633,7 @@
  15108. * The waiter is allocated on our stack, manipulated by the requeue
  15109. * code while we sleep on uaddr.
  15110. */
  15111. - debug_rt_mutex_init_waiter(&rt_waiter);
  15112. - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
  15113. - RB_CLEAR_NODE(&rt_waiter.tree_entry);
  15114. - rt_waiter.task = NULL;
  15115. + rt_mutex_init_waiter(&rt_waiter, false);
  15116. ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
  15117. if (unlikely(ret != 0))
  15118. @@ -2627,20 +2664,55 @@
  15119. /* Queue the futex_q, drop the hb lock, wait for wakeup. */
  15120. futex_wait_queue_me(hb, &q, to);
  15121. - spin_lock(&hb->lock);
  15122. - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  15123. - spin_unlock(&hb->lock);
  15124. - if (ret)
  15125. - goto out_put_keys;
  15126. + /*
  15127. + * On RT we must avoid races with requeue and trying to block
  15128. + * on two mutexes (hb->lock and uaddr2's rtmutex) by
  15129. + * serializing access to pi_blocked_on with pi_lock.
  15130. + */
  15131. + raw_spin_lock_irq(&current->pi_lock);
  15132. + if (current->pi_blocked_on) {
  15133. + /*
  15134. + * We have been requeued or are in the process of
  15135. + * being requeued.
  15136. + */
  15137. + raw_spin_unlock_irq(&current->pi_lock);
  15138. + } else {
  15139. + /*
  15140. + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS
  15141. + * prevents a concurrent requeue from moving us to the
  15142. + * uaddr2 rtmutex. After that we can safely acquire
  15143. + * (and possibly block on) hb->lock.
  15144. + */
  15145. + current->pi_blocked_on = PI_WAKEUP_INPROGRESS;
  15146. + raw_spin_unlock_irq(&current->pi_lock);
  15147. +
  15148. + spin_lock(&hb->lock);
  15149. +
  15150. + /*
  15151. + * Clean up pi_blocked_on. We might leak it otherwise
  15152. + * when we succeeded with the hb->lock in the fast
  15153. + * path.
  15154. + */
  15155. + raw_spin_lock_irq(&current->pi_lock);
  15156. + current->pi_blocked_on = NULL;
  15157. + raw_spin_unlock_irq(&current->pi_lock);
  15158. +
  15159. + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
  15160. + spin_unlock(&hb->lock);
  15161. + if (ret)
  15162. + goto out_put_keys;
  15163. + }
  15164. /*
  15165. - * In order for us to be here, we know our q.key == key2, and since
  15166. - * we took the hb->lock above, we also know that futex_requeue() has
  15167. - * completed and we no longer have to concern ourselves with a wakeup
  15168. - * race with the atomic proxy lock acquisition by the requeue code. The
  15169. - * futex_requeue dropped our key1 reference and incremented our key2
  15170. - * reference count.
  15171. + * In order to be here, we have either been requeued, are in
  15172. + * the process of being requeued, or requeue successfully
  15173. + * acquired uaddr2 on our behalf. If pi_blocked_on was
  15174. + * non-null above, we may be racing with a requeue. Do not
  15175. + * rely on q->lock_ptr to be hb2->lock until after blocking on
  15176. + * hb->lock or hb2->lock. The futex_requeue dropped our key1
  15177. + * reference and incremented our key2 reference count.
  15178. */
  15179. + hb2 = hash_futex(&key2);
  15180. /* Check if the requeue code acquired the second futex for us. */
  15181. if (!q.rt_waiter) {
  15182. @@ -2649,14 +2721,15 @@
  15183. * did a lock-steal - fix up the PI-state in that case.
  15184. */
  15185. if (q.pi_state && (q.pi_state->owner != current)) {
  15186. - spin_lock(q.lock_ptr);
  15187. + spin_lock(&hb2->lock);
  15188. + BUG_ON(&hb2->lock != q.lock_ptr);
  15189. ret = fixup_pi_state_owner(uaddr2, &q, current);
  15190. /*
  15191. * Drop the reference to the pi state which
  15192. * the requeue_pi() code acquired for us.
  15193. */
  15194. free_pi_state(q.pi_state);
  15195. - spin_unlock(q.lock_ptr);
  15196. + spin_unlock(&hb2->lock);
  15197. }
  15198. } else {
  15199. /*
  15200. @@ -2669,7 +2742,8 @@
  15201. ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
  15202. debug_rt_mutex_free_waiter(&rt_waiter);
  15203. - spin_lock(q.lock_ptr);
  15204. + spin_lock(&hb2->lock);
  15205. + BUG_ON(&hb2->lock != q.lock_ptr);
  15206. /*
  15207. * Fixup the pi_state owner and possibly acquire the lock if we
  15208. * haven't already.
  15209. diff -Nur linux-4.1.39.orig/kernel/irq/handle.c linux-4.1.39/kernel/irq/handle.c
  15210. --- linux-4.1.39.orig/kernel/irq/handle.c 2017-03-13 21:04:36.000000000 +0100
  15211. +++ linux-4.1.39/kernel/irq/handle.c 2017-04-18 17:56:30.621397441 +0200
  15212. @@ -133,6 +133,8 @@
  15213. irqreturn_t
  15214. handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
  15215. {
  15216. + struct pt_regs *regs = get_irq_regs();
  15217. + u64 ip = regs ? instruction_pointer(regs) : 0;
  15218. irqreturn_t retval = IRQ_NONE;
  15219. unsigned int flags = 0, irq = desc->irq_data.irq;
  15220. @@ -173,7 +175,11 @@
  15221. action = action->next;
  15222. } while (action);
  15223. - add_interrupt_randomness(irq, flags);
  15224. +#ifndef CONFIG_PREEMPT_RT_FULL
  15225. + add_interrupt_randomness(irq, flags, ip);
  15226. +#else
  15227. + desc->random_ip = ip;
  15228. +#endif
  15229. if (!noirqdebug)
  15230. note_interrupt(irq, desc, retval);
  15231. diff -Nur linux-4.1.39.orig/kernel/irq/manage.c linux-4.1.39/kernel/irq/manage.c
  15232. --- linux-4.1.39.orig/kernel/irq/manage.c 2017-03-13 21:04:36.000000000 +0100
  15233. +++ linux-4.1.39/kernel/irq/manage.c 2017-04-18 17:56:30.621397441 +0200
  15234. @@ -22,6 +22,7 @@
  15235. #include "internals.h"
  15236. #ifdef CONFIG_IRQ_FORCED_THREADING
  15237. +# ifndef CONFIG_PREEMPT_RT_BASE
  15238. __read_mostly bool force_irqthreads;
  15239. static int __init setup_forced_irqthreads(char *arg)
  15240. @@ -30,6 +31,7 @@
  15241. return 0;
  15242. }
  15243. early_param("threadirqs", setup_forced_irqthreads);
  15244. +# endif
  15245. #endif
  15246. static void __synchronize_hardirq(struct irq_desc *desc)
  15247. @@ -179,6 +181,62 @@
  15248. irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
  15249. #endif
  15250. +#ifdef CONFIG_PREEMPT_RT_FULL
  15251. +static void _irq_affinity_notify(struct irq_affinity_notify *notify);
  15252. +static struct task_struct *set_affinity_helper;
  15253. +static LIST_HEAD(affinity_list);
  15254. +static DEFINE_RAW_SPINLOCK(affinity_list_lock);
  15255. +
  15256. +static int set_affinity_thread(void *unused)
  15257. +{
  15258. + while (1) {
  15259. + struct irq_affinity_notify *notify;
  15260. + int empty;
  15261. +
  15262. + set_current_state(TASK_INTERRUPTIBLE);
  15263. +
  15264. + raw_spin_lock_irq(&affinity_list_lock);
  15265. + empty = list_empty(&affinity_list);
  15266. + raw_spin_unlock_irq(&affinity_list_lock);
  15267. +
  15268. + if (empty)
  15269. + schedule();
  15270. + if (kthread_should_stop())
  15271. + break;
  15272. + set_current_state(TASK_RUNNING);
  15273. +try_next:
  15274. + notify = NULL;
  15275. +
  15276. + raw_spin_lock_irq(&affinity_list_lock);
  15277. + if (!list_empty(&affinity_list)) {
  15278. + notify = list_first_entry(&affinity_list,
  15279. + struct irq_affinity_notify, list);
  15280. + list_del_init(&notify->list);
  15281. + }
  15282. + raw_spin_unlock_irq(&affinity_list_lock);
  15283. +
  15284. + if (!notify)
  15285. + continue;
  15286. + _irq_affinity_notify(notify);
  15287. + goto try_next;
  15288. + }
  15289. + return 0;
  15290. +}
  15291. +
  15292. +static void init_helper_thread(void)
  15293. +{
  15294. + if (set_affinity_helper)
  15295. + return;
  15296. + set_affinity_helper = kthread_run(set_affinity_thread, NULL,
  15297. + "affinity-cb");
  15298. + WARN_ON(IS_ERR(set_affinity_helper));
  15299. +}
  15300. +#else
  15301. +
  15302. +static inline void init_helper_thread(void) { }
  15303. +
  15304. +#endif
  15305. +
  15306. int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
  15307. bool force)
  15308. {
  15309. @@ -218,7 +276,17 @@
  15310. if (desc->affinity_notify) {
  15311. kref_get(&desc->affinity_notify->kref);
  15312. +
  15313. +#ifdef CONFIG_PREEMPT_RT_FULL
  15314. + raw_spin_lock(&affinity_list_lock);
  15315. + if (list_empty(&desc->affinity_notify->list))
  15316. + list_add_tail(&affinity_list,
  15317. + &desc->affinity_notify->list);
  15318. + raw_spin_unlock(&affinity_list_lock);
  15319. + wake_up_process(set_affinity_helper);
  15320. +#else
  15321. schedule_work(&desc->affinity_notify->work);
  15322. +#endif
  15323. }
  15324. irqd_set(data, IRQD_AFFINITY_SET);
  15325. @@ -256,10 +324,8 @@
  15326. }
  15327. EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
  15328. -static void irq_affinity_notify(struct work_struct *work)
  15329. +static void _irq_affinity_notify(struct irq_affinity_notify *notify)
  15330. {
  15331. - struct irq_affinity_notify *notify =
  15332. - container_of(work, struct irq_affinity_notify, work);
  15333. struct irq_desc *desc = irq_to_desc(notify->irq);
  15334. cpumask_var_t cpumask;
  15335. unsigned long flags;
  15336. @@ -281,6 +347,13 @@
  15337. kref_put(&notify->kref, notify->release);
  15338. }
  15339. +static void irq_affinity_notify(struct work_struct *work)
  15340. +{
  15341. + struct irq_affinity_notify *notify =
  15342. + container_of(work, struct irq_affinity_notify, work);
  15343. + _irq_affinity_notify(notify);
  15344. +}
  15345. +
  15346. /**
  15347. * irq_set_affinity_notifier - control notification of IRQ affinity changes
  15348. * @irq: Interrupt for which to enable/disable notification
  15349. @@ -310,6 +383,8 @@
  15350. notify->irq = irq;
  15351. kref_init(&notify->kref);
  15352. INIT_WORK(&notify->work, irq_affinity_notify);
  15353. + INIT_LIST_HEAD(&notify->list);
  15354. + init_helper_thread();
  15355. }
  15356. raw_spin_lock_irqsave(&desc->lock, flags);
  15357. @@ -697,6 +772,12 @@
  15358. return IRQ_NONE;
  15359. }
  15360. +static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id)
  15361. +{
  15362. + WARN(1, "Secondary action handler called for irq %d\n", irq);
  15363. + return IRQ_NONE;
  15364. +}
  15365. +
  15366. static int irq_wait_for_interrupt(struct irqaction *action)
  15367. {
  15368. set_current_state(TASK_INTERRUPTIBLE);
  15369. @@ -723,7 +804,8 @@
  15370. static void irq_finalize_oneshot(struct irq_desc *desc,
  15371. struct irqaction *action)
  15372. {
  15373. - if (!(desc->istate & IRQS_ONESHOT))
  15374. + if (!(desc->istate & IRQS_ONESHOT) ||
  15375. + action->handler == irq_forced_secondary_handler)
  15376. return;
  15377. again:
  15378. chip_bus_lock(desc);
  15379. @@ -825,7 +907,15 @@
  15380. local_bh_disable();
  15381. ret = action->thread_fn(action->irq, action->dev_id);
  15382. irq_finalize_oneshot(desc, action);
  15383. - local_bh_enable();
  15384. + /*
  15385. + * Interrupts which have real time requirements can be set up
  15386. + * to avoid softirq processing in the thread handler. This is
  15387. + * safe as these interrupts do not raise soft interrupts.
  15388. + */
  15389. + if (irq_settings_no_softirq_call(desc))
  15390. + _local_bh_enable();
  15391. + else
  15392. + local_bh_enable();
  15393. return ret;
  15394. }
  15395. @@ -877,6 +967,18 @@
  15396. irq_finalize_oneshot(desc, action);
  15397. }
  15398. +static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action)
  15399. +{
  15400. + struct irqaction *secondary = action->secondary;
  15401. +
  15402. + if (WARN_ON_ONCE(!secondary))
  15403. + return;
  15404. +
  15405. + raw_spin_lock_irq(&desc->lock);
  15406. + __irq_wake_thread(desc, secondary);
  15407. + raw_spin_unlock_irq(&desc->lock);
  15408. +}
  15409. +
  15410. /*
  15411. * Interrupt handler thread
  15412. */
  15413. @@ -907,7 +1009,15 @@
  15414. action_ret = handler_fn(desc, action);
  15415. if (action_ret == IRQ_HANDLED)
  15416. atomic_inc(&desc->threads_handled);
  15417. + if (action_ret == IRQ_WAKE_THREAD)
  15418. + irq_wake_secondary(desc, action);
  15419. +#ifdef CONFIG_PREEMPT_RT_FULL
  15420. + migrate_disable();
  15421. + add_interrupt_randomness(action->irq, 0,
  15422. + desc->random_ip ^ (unsigned long) action);
  15423. + migrate_enable();
  15424. +#endif
  15425. wake_threads_waitq(desc);
  15426. }
  15427. @@ -951,20 +1061,36 @@
  15428. }
  15429. EXPORT_SYMBOL_GPL(irq_wake_thread);
  15430. -static void irq_setup_forced_threading(struct irqaction *new)
  15431. +static int irq_setup_forced_threading(struct irqaction *new)
  15432. {
  15433. if (!force_irqthreads)
  15434. - return;
  15435. + return 0;
  15436. if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
  15437. - return;
  15438. + return 0;
  15439. new->flags |= IRQF_ONESHOT;
  15440. - if (!new->thread_fn) {
  15441. - set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
  15442. - new->thread_fn = new->handler;
  15443. - new->handler = irq_default_primary_handler;
  15444. - }
  15445. + /*
  15446. + * Handle the case where we have a real primary handler and a
  15447. + * thread handler. We force thread them as well by creating a
  15448. + * secondary action.
  15449. + */
  15450. + if (new->handler != irq_default_primary_handler && new->thread_fn) {
  15451. + /* Allocate the secondary action */
  15452. + new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
  15453. + if (!new->secondary)
  15454. + return -ENOMEM;
  15455. + new->secondary->handler = irq_forced_secondary_handler;
  15456. + new->secondary->thread_fn = new->thread_fn;
  15457. + new->secondary->dev_id = new->dev_id;
  15458. + new->secondary->irq = new->irq;
  15459. + new->secondary->name = new->name;
  15460. + }
  15461. + /* Deal with the primary handler */
  15462. + set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
  15463. + new->thread_fn = new->handler;
  15464. + new->handler = irq_default_primary_handler;
  15465. + return 0;
  15466. }
  15467. static int irq_request_resources(struct irq_desc *desc)
  15468. @@ -984,6 +1110,48 @@
  15469. c->irq_release_resources(d);
  15470. }
  15471. +static int
  15472. +setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
  15473. +{
  15474. + struct task_struct *t;
  15475. + struct sched_param param = {
  15476. + .sched_priority = MAX_USER_RT_PRIO/2,
  15477. + };
  15478. +
  15479. + if (!secondary) {
  15480. + t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
  15481. + new->name);
  15482. + } else {
  15483. + t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq,
  15484. + new->name);
  15485. + param.sched_priority += 1;
  15486. + }
  15487. +
  15488. + if (IS_ERR(t))
  15489. + return PTR_ERR(t);
  15490. +
  15491. + sched_setscheduler_nocheck(t, SCHED_FIFO, &param);
  15492. +
  15493. + /*
  15494. + * We keep the reference to the task struct even if
  15495. + * the thread dies to avoid that the interrupt code
  15496. + * references an already freed task_struct.
  15497. + */
  15498. + get_task_struct(t);
  15499. + new->thread = t;
  15500. + /*
  15501. + * Tell the thread to set its affinity. This is
  15502. + * important for shared interrupt handlers as we do
  15503. + * not invoke setup_affinity() for the secondary
  15504. + * handlers as everything is already set up. Even for
  15505. + * interrupts marked with IRQF_NO_BALANCE this is
  15506. + * correct as we want the thread to move to the cpu(s)
  15507. + * on which the requesting code placed the interrupt.
  15508. + */
  15509. + set_bit(IRQTF_AFFINITY, &new->thread_flags);
  15510. + return 0;
  15511. +}
  15512. +
  15513. /*
  15514. * Internal function to register an irqaction - typically used to
  15515. * allocate special interrupts that are part of the architecture.
  15516. @@ -1004,6 +1172,8 @@
  15517. if (!try_module_get(desc->owner))
  15518. return -ENODEV;
  15519. + new->irq = irq;
  15520. +
  15521. /*
  15522. * Check whether the interrupt nests into another interrupt
  15523. * thread.
  15524. @@ -1021,8 +1191,11 @@
  15525. */
  15526. new->handler = irq_nested_primary_handler;
  15527. } else {
  15528. - if (irq_settings_can_thread(desc))
  15529. - irq_setup_forced_threading(new);
  15530. + if (irq_settings_can_thread(desc)) {
  15531. + ret = irq_setup_forced_threading(new);
  15532. + if (ret)
  15533. + goto out_mput;
  15534. + }
  15535. }
  15536. /*
  15537. @@ -1031,37 +1204,14 @@
  15538. * thread.
  15539. */
  15540. if (new->thread_fn && !nested) {
  15541. - struct task_struct *t;
  15542. - static const struct sched_param param = {
  15543. - .sched_priority = MAX_USER_RT_PRIO/2,
  15544. - };
  15545. -
  15546. - t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
  15547. - new->name);
  15548. - if (IS_ERR(t)) {
  15549. - ret = PTR_ERR(t);
  15550. + ret = setup_irq_thread(new, irq, false);
  15551. + if (ret)
  15552. goto out_mput;
  15553. + if (new->secondary) {
  15554. + ret = setup_irq_thread(new->secondary, irq, true);
  15555. + if (ret)
  15556. + goto out_thread;
  15557. }
  15558. -
  15559. - sched_setscheduler_nocheck(t, SCHED_FIFO, &param);
  15560. -
  15561. - /*
  15562. - * We keep the reference to the task struct even if
  15563. - * the thread dies to avoid that the interrupt code
  15564. - * references an already freed task_struct.
  15565. - */
  15566. - get_task_struct(t);
  15567. - new->thread = t;
  15568. - /*
  15569. - * Tell the thread to set its affinity. This is
  15570. - * important for shared interrupt handlers as we do
  15571. - * not invoke setup_affinity() for the secondary
  15572. - * handlers as everything is already set up. Even for
  15573. - * interrupts marked with IRQF_NO_BALANCE this is
  15574. - * correct as we want the thread to move to the cpu(s)
  15575. - * on which the requesting code placed the interrupt.
  15576. - */
  15577. - set_bit(IRQTF_AFFINITY, &new->thread_flags);
  15578. }
  15579. if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
  15580. @@ -1221,6 +1371,9 @@
  15581. irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
  15582. }
  15583. + if (new->flags & IRQF_NO_SOFTIRQ_CALL)
  15584. + irq_settings_set_no_softirq_call(desc);
  15585. +
  15586. /* Set default affinity mask once everything is setup */
  15587. setup_affinity(irq, desc, mask);
  15588. @@ -1234,7 +1387,6 @@
  15589. irq, nmsk, omsk);
  15590. }
  15591. - new->irq = irq;
  15592. *old_ptr = new;
  15593. irq_pm_install_action(desc, new);
  15594. @@ -1260,6 +1412,8 @@
  15595. */
  15596. if (new->thread)
  15597. wake_up_process(new->thread);
  15598. + if (new->secondary)
  15599. + wake_up_process(new->secondary->thread);
  15600. register_irq_proc(irq, desc);
  15601. new->dir = NULL;
  15602. @@ -1290,6 +1444,13 @@
  15603. kthread_stop(t);
  15604. put_task_struct(t);
  15605. }
  15606. + if (new->secondary && new->secondary->thread) {
  15607. + struct task_struct *t = new->secondary->thread;
  15608. +
  15609. + new->secondary->thread = NULL;
  15610. + kthread_stop(t);
  15611. + put_task_struct(t);
  15612. + }
  15613. out_mput:
  15614. module_put(desc->owner);
  15615. return ret;
  15616. @@ -1397,9 +1558,14 @@
  15617. if (action->thread) {
  15618. kthread_stop(action->thread);
  15619. put_task_struct(action->thread);
  15620. + if (action->secondary && action->secondary->thread) {
  15621. + kthread_stop(action->secondary->thread);
  15622. + put_task_struct(action->secondary->thread);
  15623. + }
  15624. }
  15625. module_put(desc->owner);
  15626. + kfree(action->secondary);
  15627. return action;
  15628. }
  15629. @@ -1543,8 +1709,10 @@
  15630. retval = __setup_irq(irq, desc, action);
  15631. chip_bus_sync_unlock(desc);
  15632. - if (retval)
  15633. + if (retval) {
  15634. + kfree(action->secondary);
  15635. kfree(action);
  15636. + }
  15637. #ifdef CONFIG_DEBUG_SHIRQ_FIXME
  15638. if (!retval && (irqflags & IRQF_SHARED)) {
  15639. diff -Nur linux-4.1.39.orig/kernel/irq/settings.h linux-4.1.39/kernel/irq/settings.h
  15640. --- linux-4.1.39.orig/kernel/irq/settings.h 2017-03-13 21:04:36.000000000 +0100
  15641. +++ linux-4.1.39/kernel/irq/settings.h 2017-04-18 17:56:30.621397441 +0200
  15642. @@ -15,6 +15,7 @@
  15643. _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD,
  15644. _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
  15645. _IRQ_IS_POLLED = IRQ_IS_POLLED,
  15646. + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL,
  15647. _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
  15648. };
  15649. @@ -28,6 +29,7 @@
  15650. #define IRQ_NESTED_THREAD GOT_YOU_MORON
  15651. #define IRQ_PER_CPU_DEVID GOT_YOU_MORON
  15652. #define IRQ_IS_POLLED GOT_YOU_MORON
  15653. +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON
  15654. #undef IRQF_MODIFY_MASK
  15655. #define IRQF_MODIFY_MASK GOT_YOU_MORON
  15656. @@ -38,6 +40,16 @@
  15657. desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
  15658. }
  15659. +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc)
  15660. +{
  15661. + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL;
  15662. +}
  15663. +
  15664. +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc)
  15665. +{
  15666. + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL;
  15667. +}
  15668. +
  15669. static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
  15670. {
  15671. return desc->status_use_accessors & _IRQ_PER_CPU;
  15672. diff -Nur linux-4.1.39.orig/kernel/irq/spurious.c linux-4.1.39/kernel/irq/spurious.c
  15673. --- linux-4.1.39.orig/kernel/irq/spurious.c 2017-03-13 21:04:36.000000000 +0100
  15674. +++ linux-4.1.39/kernel/irq/spurious.c 2017-04-18 17:56:30.621397441 +0200
  15675. @@ -444,6 +444,10 @@
  15676. static int __init irqfixup_setup(char *str)
  15677. {
  15678. +#ifdef CONFIG_PREEMPT_RT_BASE
  15679. + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  15680. + return 1;
  15681. +#endif
  15682. irqfixup = 1;
  15683. printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
  15684. printk(KERN_WARNING "This may impact system performance.\n");
  15685. @@ -456,6 +460,10 @@
  15686. static int __init irqpoll_setup(char *str)
  15687. {
  15688. +#ifdef CONFIG_PREEMPT_RT_BASE
  15689. + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
  15690. + return 1;
  15691. +#endif
  15692. irqfixup = 2;
  15693. printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
  15694. "enabled\n");
  15695. diff -Nur linux-4.1.39.orig/kernel/irq_work.c linux-4.1.39/kernel/irq_work.c
  15696. --- linux-4.1.39.orig/kernel/irq_work.c 2017-03-13 21:04:36.000000000 +0100
  15697. +++ linux-4.1.39/kernel/irq_work.c 2017-04-18 17:56:30.621397441 +0200
  15698. @@ -17,6 +17,7 @@
  15699. #include <linux/cpu.h>
  15700. #include <linux/notifier.h>
  15701. #include <linux/smp.h>
  15702. +#include <linux/interrupt.h>
  15703. #include <asm/processor.h>
  15704. @@ -65,6 +66,8 @@
  15705. */
  15706. bool irq_work_queue_on(struct irq_work *work, int cpu)
  15707. {
  15708. + struct llist_head *list;
  15709. +
  15710. /* All work should have been flushed before going offline */
  15711. WARN_ON_ONCE(cpu_is_offline(cpu));
  15712. @@ -75,7 +78,12 @@
  15713. if (!irq_work_claim(work))
  15714. return false;
  15715. - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
  15716. + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ))
  15717. + list = &per_cpu(lazy_list, cpu);
  15718. + else
  15719. + list = &per_cpu(raised_list, cpu);
  15720. +
  15721. + if (llist_add(&work->llnode, list))
  15722. arch_send_call_function_single_ipi(cpu);
  15723. return true;
  15724. @@ -86,6 +94,9 @@
  15725. /* Enqueue the irq work @work on the current CPU */
  15726. bool irq_work_queue(struct irq_work *work)
  15727. {
  15728. + struct llist_head *list;
  15729. + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
  15730. +
  15731. /* Only queue if not already pending */
  15732. if (!irq_work_claim(work))
  15733. return false;
  15734. @@ -93,13 +104,15 @@
  15735. /* Queue the entry and raise the IPI if needed. */
  15736. preempt_disable();
  15737. - /* If the work is "lazy", handle it from next tick if any */
  15738. - if (work->flags & IRQ_WORK_LAZY) {
  15739. - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
  15740. - tick_nohz_tick_stopped())
  15741. - arch_irq_work_raise();
  15742. - } else {
  15743. - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
  15744. + lazy_work = work->flags & IRQ_WORK_LAZY;
  15745. +
  15746. + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ)))
  15747. + list = this_cpu_ptr(&lazy_list);
  15748. + else
  15749. + list = this_cpu_ptr(&raised_list);
  15750. +
  15751. + if (llist_add(&work->llnode, list)) {
  15752. + if (!lazy_work || tick_nohz_tick_stopped())
  15753. arch_irq_work_raise();
  15754. }
  15755. @@ -116,9 +129,8 @@
  15756. raised = this_cpu_ptr(&raised_list);
  15757. lazy = this_cpu_ptr(&lazy_list);
  15758. - if (llist_empty(raised) || arch_irq_work_has_interrupt())
  15759. - if (llist_empty(lazy))
  15760. - return false;
  15761. + if (llist_empty(raised) && llist_empty(lazy))
  15762. + return false;
  15763. /* All work should have been flushed before going offline */
  15764. WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
  15765. @@ -132,7 +144,7 @@
  15766. struct irq_work *work;
  15767. struct llist_node *llnode;
  15768. - BUG_ON(!irqs_disabled());
  15769. + BUG_ON_NONRT(!irqs_disabled());
  15770. if (llist_empty(list))
  15771. return;
  15772. @@ -169,7 +181,16 @@
  15773. void irq_work_run(void)
  15774. {
  15775. irq_work_run_list(this_cpu_ptr(&raised_list));
  15776. - irq_work_run_list(this_cpu_ptr(&lazy_list));
  15777. + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) {
  15778. + /*
  15779. + * NOTE: we raise softirq via IPI for safety,
  15780. + * and execute in irq_work_tick() to move the
  15781. + * overhead from hard to soft irq context.
  15782. + */
  15783. + if (!llist_empty(this_cpu_ptr(&lazy_list)))
  15784. + raise_softirq(TIMER_SOFTIRQ);
  15785. + } else
  15786. + irq_work_run_list(this_cpu_ptr(&lazy_list));
  15787. }
  15788. EXPORT_SYMBOL_GPL(irq_work_run);
  15789. @@ -179,8 +200,17 @@
  15790. if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
  15791. irq_work_run_list(raised);
  15792. +
  15793. + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
  15794. + irq_work_run_list(this_cpu_ptr(&lazy_list));
  15795. +}
  15796. +
  15797. +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL)
  15798. +void irq_work_tick_soft(void)
  15799. +{
  15800. irq_work_run_list(this_cpu_ptr(&lazy_list));
  15801. }
  15802. +#endif
  15803. /*
  15804. * Synchronize against the irq_work @entry, ensures the entry is not
  15805. diff -Nur linux-4.1.39.orig/kernel/Kconfig.locks linux-4.1.39/kernel/Kconfig.locks
  15806. --- linux-4.1.39.orig/kernel/Kconfig.locks 2017-03-13 21:04:36.000000000 +0100
  15807. +++ linux-4.1.39/kernel/Kconfig.locks 2017-04-18 17:56:30.617397286 +0200
  15808. @@ -225,11 +225,11 @@
  15809. config MUTEX_SPIN_ON_OWNER
  15810. def_bool y
  15811. - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
  15812. + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  15813. config RWSEM_SPIN_ON_OWNER
  15814. def_bool y
  15815. - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
  15816. + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL
  15817. config LOCK_SPIN_ON_OWNER
  15818. def_bool y
  15819. diff -Nur linux-4.1.39.orig/kernel/Kconfig.preempt linux-4.1.39/kernel/Kconfig.preempt
  15820. --- linux-4.1.39.orig/kernel/Kconfig.preempt 2017-03-13 21:04:36.000000000 +0100
  15821. +++ linux-4.1.39/kernel/Kconfig.preempt 2017-04-18 17:56:30.617397286 +0200
  15822. @@ -1,3 +1,16 @@
  15823. +config PREEMPT
  15824. + bool
  15825. + select PREEMPT_COUNT
  15826. +
  15827. +config PREEMPT_RT_BASE
  15828. + bool
  15829. + select PREEMPT
  15830. +
  15831. +config HAVE_PREEMPT_LAZY
  15832. + bool
  15833. +
  15834. +config PREEMPT_LAZY
  15835. + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL
  15836. choice
  15837. prompt "Preemption Model"
  15838. @@ -33,9 +46,9 @@
  15839. Select this if you are building a kernel for a desktop system.
  15840. -config PREEMPT
  15841. +config PREEMPT__LL
  15842. bool "Preemptible Kernel (Low-Latency Desktop)"
  15843. - select PREEMPT_COUNT
  15844. + select PREEMPT
  15845. select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
  15846. help
  15847. This option reduces the latency of the kernel by making
  15848. @@ -52,6 +65,22 @@
  15849. embedded system with latency requirements in the milliseconds
  15850. range.
  15851. +config PREEMPT_RTB
  15852. + bool "Preemptible Kernel (Basic RT)"
  15853. + select PREEMPT_RT_BASE
  15854. + help
  15855. + This option is basically the same as (Low-Latency Desktop) but
  15856. + enables changes which are preliminary for the full preemptible
  15857. + RT kernel.
  15858. +
  15859. +config PREEMPT_RT_FULL
  15860. + bool "Fully Preemptible Kernel (RT)"
  15861. + depends on IRQ_FORCED_THREADING
  15862. + select PREEMPT_RT_BASE
  15863. + select PREEMPT_RCU
  15864. + help
  15865. + All and everything
  15866. +
  15867. endchoice
  15868. config PREEMPT_COUNT
  15869. diff -Nur linux-4.1.39.orig/kernel/ksysfs.c linux-4.1.39/kernel/ksysfs.c
  15870. --- linux-4.1.39.orig/kernel/ksysfs.c 2017-03-13 21:04:36.000000000 +0100
  15871. +++ linux-4.1.39/kernel/ksysfs.c 2017-04-18 17:56:30.621397441 +0200
  15872. @@ -136,6 +136,15 @@
  15873. #endif /* CONFIG_KEXEC */
  15874. +#if defined(CONFIG_PREEMPT_RT_FULL)
  15875. +static ssize_t realtime_show(struct kobject *kobj,
  15876. + struct kobj_attribute *attr, char *buf)
  15877. +{
  15878. + return sprintf(buf, "%d\n", 1);
  15879. +}
  15880. +KERNEL_ATTR_RO(realtime);
  15881. +#endif
  15882. +
  15883. /* whether file capabilities are enabled */
  15884. static ssize_t fscaps_show(struct kobject *kobj,
  15885. struct kobj_attribute *attr, char *buf)
  15886. @@ -203,6 +212,9 @@
  15887. &vmcoreinfo_attr.attr,
  15888. #endif
  15889. &rcu_expedited_attr.attr,
  15890. +#ifdef CONFIG_PREEMPT_RT_FULL
  15891. + &realtime_attr.attr,
  15892. +#endif
  15893. NULL
  15894. };
  15895. diff -Nur linux-4.1.39.orig/kernel/locking/lglock.c linux-4.1.39/kernel/locking/lglock.c
  15896. --- linux-4.1.39.orig/kernel/locking/lglock.c 2017-03-13 21:04:36.000000000 +0100
  15897. +++ linux-4.1.39/kernel/locking/lglock.c 2017-04-18 17:56:30.621397441 +0200
  15898. @@ -4,6 +4,15 @@
  15899. #include <linux/cpu.h>
  15900. #include <linux/string.h>
  15901. +#ifndef CONFIG_PREEMPT_RT_FULL
  15902. +# define lg_lock_ptr arch_spinlock_t
  15903. +# define lg_do_lock(l) arch_spin_lock(l)
  15904. +# define lg_do_unlock(l) arch_spin_unlock(l)
  15905. +#else
  15906. +# define lg_lock_ptr struct rt_mutex
  15907. +# define lg_do_lock(l) __rt_spin_lock(l)
  15908. +# define lg_do_unlock(l) __rt_spin_unlock(l)
  15909. +#endif
  15910. /*
  15911. * Note there is no uninit, so lglocks cannot be defined in
  15912. * modules (but it's fine to use them from there)
  15913. @@ -12,51 +21,60 @@
  15914. void lg_lock_init(struct lglock *lg, char *name)
  15915. {
  15916. +#ifdef CONFIG_PREEMPT_RT_FULL
  15917. + int i;
  15918. +
  15919. + for_each_possible_cpu(i) {
  15920. + struct rt_mutex *lock = per_cpu_ptr(lg->lock, i);
  15921. +
  15922. + rt_mutex_init(lock);
  15923. + }
  15924. +#endif
  15925. LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
  15926. }
  15927. EXPORT_SYMBOL(lg_lock_init);
  15928. void lg_local_lock(struct lglock *lg)
  15929. {
  15930. - arch_spinlock_t *lock;
  15931. + lg_lock_ptr *lock;
  15932. - preempt_disable();
  15933. + migrate_disable();
  15934. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  15935. lock = this_cpu_ptr(lg->lock);
  15936. - arch_spin_lock(lock);
  15937. + lg_do_lock(lock);
  15938. }
  15939. EXPORT_SYMBOL(lg_local_lock);
  15940. void lg_local_unlock(struct lglock *lg)
  15941. {
  15942. - arch_spinlock_t *lock;
  15943. + lg_lock_ptr *lock;
  15944. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  15945. lock = this_cpu_ptr(lg->lock);
  15946. - arch_spin_unlock(lock);
  15947. - preempt_enable();
  15948. + lg_do_unlock(lock);
  15949. + migrate_enable();
  15950. }
  15951. EXPORT_SYMBOL(lg_local_unlock);
  15952. void lg_local_lock_cpu(struct lglock *lg, int cpu)
  15953. {
  15954. - arch_spinlock_t *lock;
  15955. + lg_lock_ptr *lock;
  15956. - preempt_disable();
  15957. + preempt_disable_nort();
  15958. lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  15959. lock = per_cpu_ptr(lg->lock, cpu);
  15960. - arch_spin_lock(lock);
  15961. + lg_do_lock(lock);
  15962. }
  15963. EXPORT_SYMBOL(lg_local_lock_cpu);
  15964. void lg_local_unlock_cpu(struct lglock *lg, int cpu)
  15965. {
  15966. - arch_spinlock_t *lock;
  15967. + lg_lock_ptr *lock;
  15968. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  15969. lock = per_cpu_ptr(lg->lock, cpu);
  15970. - arch_spin_unlock(lock);
  15971. - preempt_enable();
  15972. + lg_do_unlock(lock);
  15973. + preempt_enable_nort();
  15974. }
  15975. EXPORT_SYMBOL(lg_local_unlock_cpu);
  15976. @@ -64,12 +82,12 @@
  15977. {
  15978. int i;
  15979. - preempt_disable();
  15980. + preempt_disable_nort();
  15981. lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  15982. for_each_possible_cpu(i) {
  15983. - arch_spinlock_t *lock;
  15984. + lg_lock_ptr *lock;
  15985. lock = per_cpu_ptr(lg->lock, i);
  15986. - arch_spin_lock(lock);
  15987. + lg_do_lock(lock);
  15988. }
  15989. }
  15990. EXPORT_SYMBOL(lg_global_lock);
  15991. @@ -80,10 +98,35 @@
  15992. lock_release(&lg->lock_dep_map, 1, _RET_IP_);
  15993. for_each_possible_cpu(i) {
  15994. - arch_spinlock_t *lock;
  15995. + lg_lock_ptr *lock;
  15996. lock = per_cpu_ptr(lg->lock, i);
  15997. - arch_spin_unlock(lock);
  15998. + lg_do_unlock(lock);
  15999. }
  16000. - preempt_enable();
  16001. + preempt_enable_nort();
  16002. }
  16003. EXPORT_SYMBOL(lg_global_unlock);
  16004. +
  16005. +#ifdef CONFIG_PREEMPT_RT_FULL
  16006. +/*
  16007. + * HACK: If you use this, you get to keep the pieces.
  16008. + * Used in queue_stop_cpus_work() when stop machinery
  16009. + * is called from inactive CPU, so we can't schedule.
  16010. + */
  16011. +# define lg_do_trylock_relax(l) \
  16012. + do { \
  16013. + while (!__rt_spin_trylock(l)) \
  16014. + cpu_relax(); \
  16015. + } while (0)
  16016. +
  16017. +void lg_global_trylock_relax(struct lglock *lg)
  16018. +{
  16019. + int i;
  16020. +
  16021. + lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
  16022. + for_each_possible_cpu(i) {
  16023. + lg_lock_ptr *lock;
  16024. + lock = per_cpu_ptr(lg->lock, i);
  16025. + lg_do_trylock_relax(lock);
  16026. + }
  16027. +}
  16028. +#endif
  16029. diff -Nur linux-4.1.39.orig/kernel/locking/lockdep.c linux-4.1.39/kernel/locking/lockdep.c
  16030. --- linux-4.1.39.orig/kernel/locking/lockdep.c 2017-03-13 21:04:36.000000000 +0100
  16031. +++ linux-4.1.39/kernel/locking/lockdep.c 2017-04-18 17:56:30.621397441 +0200
  16032. @@ -668,6 +668,7 @@
  16033. struct lockdep_subclass_key *key;
  16034. struct list_head *hash_head;
  16035. struct lock_class *class;
  16036. + bool is_static = false;
  16037. #ifdef CONFIG_DEBUG_LOCKDEP
  16038. /*
  16039. @@ -695,10 +696,23 @@
  16040. /*
  16041. * Static locks do not have their class-keys yet - for them the key
  16042. - * is the lock object itself:
  16043. - */
  16044. - if (unlikely(!lock->key))
  16045. - lock->key = (void *)lock;
  16046. + * is the lock object itself. If the lock is in the per cpu area,
  16047. + * the canonical address of the lock (per cpu offset removed) is
  16048. + * used.
  16049. + */
  16050. + if (unlikely(!lock->key)) {
  16051. + unsigned long can_addr, addr = (unsigned long)lock;
  16052. +
  16053. + if (__is_kernel_percpu_address(addr, &can_addr))
  16054. + lock->key = (void *)can_addr;
  16055. + else if (__is_module_percpu_address(addr, &can_addr))
  16056. + lock->key = (void *)can_addr;
  16057. + else if (static_obj(lock))
  16058. + lock->key = (void *)lock;
  16059. + else
  16060. + return ERR_PTR(-EINVAL);
  16061. + is_static = true;
  16062. + }
  16063. /*
  16064. * NOTE: the class-key must be unique. For dynamic locks, a static
  16065. @@ -730,7 +744,7 @@
  16066. }
  16067. }
  16068. - return NULL;
  16069. + return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL);
  16070. }
  16071. /*
  16072. @@ -748,19 +762,18 @@
  16073. DEBUG_LOCKS_WARN_ON(!irqs_disabled());
  16074. class = look_up_lock_class(lock, subclass);
  16075. - if (likely(class))
  16076. + if (likely(!IS_ERR_OR_NULL(class)))
  16077. goto out_set_class_cache;
  16078. /*
  16079. * Debug-check: all keys must be persistent!
  16080. - */
  16081. - if (!static_obj(lock->key)) {
  16082. + */
  16083. + if (IS_ERR(class)) {
  16084. debug_locks_off();
  16085. printk("INFO: trying to register non-static key.\n");
  16086. printk("the code is fine but needs lockdep annotation.\n");
  16087. printk("turning off the locking correctness validator.\n");
  16088. dump_stack();
  16089. -
  16090. return NULL;
  16091. }
  16092. @@ -3297,7 +3310,7 @@
  16093. * Clearly if the lock hasn't been acquired _ever_, we're not
  16094. * holding it either, so report failure.
  16095. */
  16096. - if (!class)
  16097. + if (IS_ERR_OR_NULL(class))
  16098. return 0;
  16099. /*
  16100. @@ -3563,6 +3576,7 @@
  16101. }
  16102. }
  16103. +#ifndef CONFIG_PREEMPT_RT_FULL
  16104. /*
  16105. * We dont accurately track softirq state in e.g.
  16106. * hardirq contexts (such as on 4KSTACKS), so only
  16107. @@ -3577,6 +3591,7 @@
  16108. DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
  16109. }
  16110. }
  16111. +#endif
  16112. if (!debug_locks)
  16113. print_irqtrace_events(current);
  16114. @@ -3980,7 +3995,7 @@
  16115. * If the class exists we look it up and zap it:
  16116. */
  16117. class = look_up_lock_class(lock, j);
  16118. - if (class)
  16119. + if (!IS_ERR_OR_NULL(class))
  16120. zap_class(class);
  16121. }
  16122. /*
  16123. diff -Nur linux-4.1.39.orig/kernel/locking/locktorture.c linux-4.1.39/kernel/locking/locktorture.c
  16124. --- linux-4.1.39.orig/kernel/locking/locktorture.c 2017-03-13 21:04:36.000000000 +0100
  16125. +++ linux-4.1.39/kernel/locking/locktorture.c 2017-04-18 17:56:30.621397441 +0200
  16126. @@ -24,7 +24,6 @@
  16127. #include <linux/module.h>
  16128. #include <linux/kthread.h>
  16129. #include <linux/spinlock.h>
  16130. -#include <linux/rwlock.h>
  16131. #include <linux/mutex.h>
  16132. #include <linux/rwsem.h>
  16133. #include <linux/smp.h>
  16134. diff -Nur linux-4.1.39.orig/kernel/locking/Makefile linux-4.1.39/kernel/locking/Makefile
  16135. --- linux-4.1.39.orig/kernel/locking/Makefile 2017-03-13 21:04:36.000000000 +0100
  16136. +++ linux-4.1.39/kernel/locking/Makefile 2017-04-18 17:56:30.621397441 +0200
  16137. @@ -1,5 +1,5 @@
  16138. -obj-y += mutex.o semaphore.o rwsem.o
  16139. +obj-y += semaphore.o
  16140. ifdef CONFIG_FUNCTION_TRACER
  16141. CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
  16142. @@ -8,7 +8,11 @@
  16143. CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
  16144. endif
  16145. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  16146. +obj-y += mutex.o
  16147. obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
  16148. +obj-y += rwsem.o
  16149. +endif
  16150. obj-$(CONFIG_LOCKDEP) += lockdep.o
  16151. ifeq ($(CONFIG_PROC_FS),y)
  16152. obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
  16153. @@ -22,8 +26,11 @@
  16154. obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
  16155. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
  16156. obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
  16157. +ifneq ($(CONFIG_PREEMPT_RT_FULL),y)
  16158. obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
  16159. obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
  16160. +endif
  16161. obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
  16162. +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o
  16163. obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
  16164. obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
  16165. diff -Nur linux-4.1.39.orig/kernel/locking/rt.c linux-4.1.39/kernel/locking/rt.c
  16166. --- linux-4.1.39.orig/kernel/locking/rt.c 1970-01-01 01:00:00.000000000 +0100
  16167. +++ linux-4.1.39/kernel/locking/rt.c 2017-04-18 17:56:30.621397441 +0200
  16168. @@ -0,0 +1,461 @@
  16169. +/*
  16170. + * kernel/rt.c
  16171. + *
  16172. + * Real-Time Preemption Support
  16173. + *
  16174. + * started by Ingo Molnar:
  16175. + *
  16176. + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  16177. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  16178. + *
  16179. + * historic credit for proving that Linux spinlocks can be implemented via
  16180. + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
  16181. + * and others) who prototyped it on 2.4 and did lots of comparative
  16182. + * research and analysis; TimeSys, for proving that you can implement a
  16183. + * fully preemptible kernel via the use of IRQ threading and mutexes;
  16184. + * Bill Huey for persuasively arguing on lkml that the mutex model is the
  16185. + * right one; and to MontaVista, who ported pmutexes to 2.6.
  16186. + *
  16187. + * This code is a from-scratch implementation and is not based on pmutexes,
  16188. + * but the idea of converting spinlocks to mutexes is used here too.
  16189. + *
  16190. + * lock debugging, locking tree, deadlock detection:
  16191. + *
  16192. + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
  16193. + * Released under the General Public License (GPL).
  16194. + *
  16195. + * Includes portions of the generic R/W semaphore implementation from:
  16196. + *
  16197. + * Copyright (c) 2001 David Howells (dhowells@redhat.com).
  16198. + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
  16199. + * - Derived also from comments by Linus
  16200. + *
  16201. + * Pending ownership of locks and ownership stealing:
  16202. + *
  16203. + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
  16204. + *
  16205. + * (also by Steven Rostedt)
  16206. + * - Converted single pi_lock to individual task locks.
  16207. + *
  16208. + * By Esben Nielsen:
  16209. + * Doing priority inheritance with help of the scheduler.
  16210. + *
  16211. + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  16212. + * - major rework based on Esben Nielsens initial patch
  16213. + * - replaced thread_info references by task_struct refs
  16214. + * - removed task->pending_owner dependency
  16215. + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks
  16216. + * in the scheduler return path as discussed with Steven Rostedt
  16217. + *
  16218. + * Copyright (C) 2006, Kihon Technologies Inc.
  16219. + * Steven Rostedt <rostedt@goodmis.org>
  16220. + * - debugged and patched Thomas Gleixner's rework.
  16221. + * - added back the cmpxchg to the rework.
  16222. + * - turned atomic require back on for SMP.
  16223. + */
  16224. +
  16225. +#include <linux/spinlock.h>
  16226. +#include <linux/rtmutex.h>
  16227. +#include <linux/sched.h>
  16228. +#include <linux/delay.h>
  16229. +#include <linux/module.h>
  16230. +#include <linux/kallsyms.h>
  16231. +#include <linux/syscalls.h>
  16232. +#include <linux/interrupt.h>
  16233. +#include <linux/plist.h>
  16234. +#include <linux/fs.h>
  16235. +#include <linux/futex.h>
  16236. +#include <linux/hrtimer.h>
  16237. +
  16238. +#include "rtmutex_common.h"
  16239. +
  16240. +/*
  16241. + * struct mutex functions
  16242. + */
  16243. +void __mutex_do_init(struct mutex *mutex, const char *name,
  16244. + struct lock_class_key *key)
  16245. +{
  16246. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16247. + /*
  16248. + * Make sure we are not reinitializing a held lock:
  16249. + */
  16250. + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
  16251. + lockdep_init_map(&mutex->dep_map, name, key, 0);
  16252. +#endif
  16253. + mutex->lock.save_state = 0;
  16254. +}
  16255. +EXPORT_SYMBOL(__mutex_do_init);
  16256. +
  16257. +void __lockfunc _mutex_lock(struct mutex *lock)
  16258. +{
  16259. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  16260. + rt_mutex_lock(&lock->lock);
  16261. +}
  16262. +EXPORT_SYMBOL(_mutex_lock);
  16263. +
  16264. +int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
  16265. +{
  16266. + int ret;
  16267. +
  16268. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  16269. + ret = rt_mutex_lock_interruptible(&lock->lock);
  16270. + if (ret)
  16271. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16272. + return ret;
  16273. +}
  16274. +EXPORT_SYMBOL(_mutex_lock_interruptible);
  16275. +
  16276. +int __lockfunc _mutex_lock_killable(struct mutex *lock)
  16277. +{
  16278. + int ret;
  16279. +
  16280. + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  16281. + ret = rt_mutex_lock_killable(&lock->lock);
  16282. + if (ret)
  16283. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16284. + return ret;
  16285. +}
  16286. +EXPORT_SYMBOL(_mutex_lock_killable);
  16287. +
  16288. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16289. +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
  16290. +{
  16291. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  16292. + rt_mutex_lock(&lock->lock);
  16293. +}
  16294. +EXPORT_SYMBOL(_mutex_lock_nested);
  16295. +
  16296. +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
  16297. +{
  16298. + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
  16299. + rt_mutex_lock(&lock->lock);
  16300. +}
  16301. +EXPORT_SYMBOL(_mutex_lock_nest_lock);
  16302. +
  16303. +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
  16304. +{
  16305. + int ret;
  16306. +
  16307. + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
  16308. + ret = rt_mutex_lock_interruptible(&lock->lock);
  16309. + if (ret)
  16310. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16311. + return ret;
  16312. +}
  16313. +EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
  16314. +
  16315. +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
  16316. +{
  16317. + int ret;
  16318. +
  16319. + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  16320. + ret = rt_mutex_lock_killable(&lock->lock);
  16321. + if (ret)
  16322. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16323. + return ret;
  16324. +}
  16325. +EXPORT_SYMBOL(_mutex_lock_killable_nested);
  16326. +#endif
  16327. +
  16328. +int __lockfunc _mutex_trylock(struct mutex *lock)
  16329. +{
  16330. + int ret = rt_mutex_trylock(&lock->lock);
  16331. +
  16332. + if (ret)
  16333. + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  16334. +
  16335. + return ret;
  16336. +}
  16337. +EXPORT_SYMBOL(_mutex_trylock);
  16338. +
  16339. +void __lockfunc _mutex_unlock(struct mutex *lock)
  16340. +{
  16341. + mutex_release(&lock->dep_map, 1, _RET_IP_);
  16342. + rt_mutex_unlock(&lock->lock);
  16343. +}
  16344. +EXPORT_SYMBOL(_mutex_unlock);
  16345. +
  16346. +/*
  16347. + * rwlock_t functions
  16348. + */
  16349. +int __lockfunc rt_write_trylock(rwlock_t *rwlock)
  16350. +{
  16351. + int ret;
  16352. +
  16353. + migrate_disable();
  16354. + ret = rt_mutex_trylock(&rwlock->lock);
  16355. + if (ret)
  16356. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  16357. + else
  16358. + migrate_enable();
  16359. +
  16360. + return ret;
  16361. +}
  16362. +EXPORT_SYMBOL(rt_write_trylock);
  16363. +
  16364. +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
  16365. +{
  16366. + int ret;
  16367. +
  16368. + *flags = 0;
  16369. + ret = rt_write_trylock(rwlock);
  16370. + return ret;
  16371. +}
  16372. +EXPORT_SYMBOL(rt_write_trylock_irqsave);
  16373. +
  16374. +int __lockfunc rt_read_trylock(rwlock_t *rwlock)
  16375. +{
  16376. + struct rt_mutex *lock = &rwlock->lock;
  16377. + int ret = 1;
  16378. +
  16379. + /*
  16380. + * recursive read locks succeed when current owns the lock,
  16381. + * but not when read_depth == 0 which means that the lock is
  16382. + * write locked.
  16383. + */
  16384. + if (rt_mutex_owner(lock) != current) {
  16385. + migrate_disable();
  16386. + ret = rt_mutex_trylock(lock);
  16387. + if (ret)
  16388. + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
  16389. + else
  16390. + migrate_enable();
  16391. +
  16392. + } else if (!rwlock->read_depth) {
  16393. + ret = 0;
  16394. + }
  16395. +
  16396. + if (ret)
  16397. + rwlock->read_depth++;
  16398. +
  16399. + return ret;
  16400. +}
  16401. +EXPORT_SYMBOL(rt_read_trylock);
  16402. +
  16403. +void __lockfunc rt_write_lock(rwlock_t *rwlock)
  16404. +{
  16405. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  16406. + migrate_disable();
  16407. + __rt_spin_lock(&rwlock->lock);
  16408. +}
  16409. +EXPORT_SYMBOL(rt_write_lock);
  16410. +
  16411. +void __lockfunc rt_read_lock(rwlock_t *rwlock)
  16412. +{
  16413. + struct rt_mutex *lock = &rwlock->lock;
  16414. +
  16415. +
  16416. + /*
  16417. + * recursive read locks succeed when current owns the lock
  16418. + */
  16419. + if (rt_mutex_owner(lock) != current) {
  16420. + migrate_disable();
  16421. + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
  16422. + __rt_spin_lock(lock);
  16423. + }
  16424. + rwlock->read_depth++;
  16425. +}
  16426. +
  16427. +EXPORT_SYMBOL(rt_read_lock);
  16428. +
  16429. +void __lockfunc rt_write_unlock(rwlock_t *rwlock)
  16430. +{
  16431. + /* NOTE: we always pass in '1' for nested, for simplicity */
  16432. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  16433. + __rt_spin_unlock(&rwlock->lock);
  16434. + migrate_enable();
  16435. +}
  16436. +EXPORT_SYMBOL(rt_write_unlock);
  16437. +
  16438. +void __lockfunc rt_read_unlock(rwlock_t *rwlock)
  16439. +{
  16440. + /* Release the lock only when read_depth is down to 0 */
  16441. + if (--rwlock->read_depth == 0) {
  16442. + rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
  16443. + __rt_spin_unlock(&rwlock->lock);
  16444. + migrate_enable();
  16445. + }
  16446. +}
  16447. +EXPORT_SYMBOL(rt_read_unlock);
  16448. +
  16449. +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
  16450. +{
  16451. + rt_write_lock(rwlock);
  16452. +
  16453. + return 0;
  16454. +}
  16455. +EXPORT_SYMBOL(rt_write_lock_irqsave);
  16456. +
  16457. +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
  16458. +{
  16459. + rt_read_lock(rwlock);
  16460. +
  16461. + return 0;
  16462. +}
  16463. +EXPORT_SYMBOL(rt_read_lock_irqsave);
  16464. +
  16465. +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
  16466. +{
  16467. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16468. + /*
  16469. + * Make sure we are not reinitializing a held lock:
  16470. + */
  16471. + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
  16472. + lockdep_init_map(&rwlock->dep_map, name, key, 0);
  16473. +#endif
  16474. + rwlock->lock.save_state = 1;
  16475. + rwlock->read_depth = 0;
  16476. +}
  16477. +EXPORT_SYMBOL(__rt_rwlock_init);
  16478. +
  16479. +/*
  16480. + * rw_semaphores
  16481. + */
  16482. +
  16483. +void rt_up_write(struct rw_semaphore *rwsem)
  16484. +{
  16485. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  16486. + rt_mutex_unlock(&rwsem->lock);
  16487. +}
  16488. +EXPORT_SYMBOL(rt_up_write);
  16489. +
  16490. +void __rt_up_read(struct rw_semaphore *rwsem)
  16491. +{
  16492. + if (--rwsem->read_depth == 0)
  16493. + rt_mutex_unlock(&rwsem->lock);
  16494. +}
  16495. +
  16496. +void rt_up_read(struct rw_semaphore *rwsem)
  16497. +{
  16498. + rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
  16499. + __rt_up_read(rwsem);
  16500. +}
  16501. +EXPORT_SYMBOL(rt_up_read);
  16502. +
  16503. +/*
  16504. + * downgrade a write lock into a read lock
  16505. + * - just wake up any readers at the front of the queue
  16506. + */
  16507. +void rt_downgrade_write(struct rw_semaphore *rwsem)
  16508. +{
  16509. + BUG_ON(rt_mutex_owner(&rwsem->lock) != current);
  16510. + rwsem->read_depth = 1;
  16511. +}
  16512. +EXPORT_SYMBOL(rt_downgrade_write);
  16513. +
  16514. +int rt_down_write_trylock(struct rw_semaphore *rwsem)
  16515. +{
  16516. + int ret = rt_mutex_trylock(&rwsem->lock);
  16517. +
  16518. + if (ret)
  16519. + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
  16520. + return ret;
  16521. +}
  16522. +EXPORT_SYMBOL(rt_down_write_trylock);
  16523. +
  16524. +void rt_down_write(struct rw_semaphore *rwsem)
  16525. +{
  16526. + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
  16527. + rt_mutex_lock(&rwsem->lock);
  16528. +}
  16529. +EXPORT_SYMBOL(rt_down_write);
  16530. +
  16531. +void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
  16532. +{
  16533. + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
  16534. + rt_mutex_lock(&rwsem->lock);
  16535. +}
  16536. +EXPORT_SYMBOL(rt_down_write_nested);
  16537. +
  16538. +void rt_down_write_nested_lock(struct rw_semaphore *rwsem,
  16539. + struct lockdep_map *nest)
  16540. +{
  16541. + rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_);
  16542. + rt_mutex_lock(&rwsem->lock);
  16543. +}
  16544. +EXPORT_SYMBOL(rt_down_write_nested_lock);
  16545. +
  16546. +int rt_down_read_trylock(struct rw_semaphore *rwsem)
  16547. +{
  16548. + struct rt_mutex *lock = &rwsem->lock;
  16549. + int ret = 1;
  16550. +
  16551. + /*
  16552. + * recursive read locks succeed when current owns the rwsem,
  16553. + * but not when read_depth == 0 which means that the rwsem is
  16554. + * write locked.
  16555. + */
  16556. + if (rt_mutex_owner(lock) != current)
  16557. + ret = rt_mutex_trylock(&rwsem->lock);
  16558. + else if (!rwsem->read_depth)
  16559. + ret = 0;
  16560. +
  16561. + if (ret) {
  16562. + rwsem->read_depth++;
  16563. + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
  16564. + }
  16565. + return ret;
  16566. +}
  16567. +EXPORT_SYMBOL(rt_down_read_trylock);
  16568. +
  16569. +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
  16570. +{
  16571. + struct rt_mutex *lock = &rwsem->lock;
  16572. +
  16573. + rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_);
  16574. +
  16575. + if (rt_mutex_owner(lock) != current)
  16576. + rt_mutex_lock(&rwsem->lock);
  16577. + rwsem->read_depth++;
  16578. +}
  16579. +
  16580. +void rt_down_read(struct rw_semaphore *rwsem)
  16581. +{
  16582. + __rt_down_read(rwsem, 0);
  16583. +}
  16584. +EXPORT_SYMBOL(rt_down_read);
  16585. +
  16586. +void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass)
  16587. +{
  16588. + __rt_down_read(rwsem, subclass);
  16589. +}
  16590. +EXPORT_SYMBOL(rt_down_read_nested);
  16591. +
  16592. +void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name,
  16593. + struct lock_class_key *key)
  16594. +{
  16595. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16596. + /*
  16597. + * Make sure we are not reinitializing a held lock:
  16598. + */
  16599. + debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
  16600. + lockdep_init_map(&rwsem->dep_map, name, key, 0);
  16601. +#endif
  16602. + rwsem->read_depth = 0;
  16603. + rwsem->lock.save_state = 0;
  16604. +}
  16605. +EXPORT_SYMBOL(__rt_rwsem_init);
  16606. +
  16607. +/**
  16608. + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
  16609. + * @cnt: the atomic which we are to dec
  16610. + * @lock: the mutex to return holding if we dec to 0
  16611. + *
  16612. + * return true and hold lock if we dec to 0, return false otherwise
  16613. + */
  16614. +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
  16615. +{
  16616. + /* dec if we can't possibly hit 0 */
  16617. + if (atomic_add_unless(cnt, -1, 1))
  16618. + return 0;
  16619. + /* we might hit 0, so take the lock */
  16620. + mutex_lock(lock);
  16621. + if (!atomic_dec_and_test(cnt)) {
  16622. + /* when we actually did the dec, we didn't hit 0 */
  16623. + mutex_unlock(lock);
  16624. + return 0;
  16625. + }
  16626. + /* we hit 0, and we hold the lock */
  16627. + return 1;
  16628. +}
  16629. +EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
  16630. diff -Nur linux-4.1.39.orig/kernel/locking/rtmutex.c linux-4.1.39/kernel/locking/rtmutex.c
  16631. --- linux-4.1.39.orig/kernel/locking/rtmutex.c 2017-03-13 21:04:36.000000000 +0100
  16632. +++ linux-4.1.39/kernel/locking/rtmutex.c 2017-04-18 17:56:30.625397596 +0200
  16633. @@ -7,6 +7,11 @@
  16634. * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  16635. * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  16636. * Copyright (C) 2006 Esben Nielsen
  16637. + * Adaptive Spinlocks:
  16638. + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
  16639. + * and Peter Morreale,
  16640. + * Adaptive Spinlocks simplification:
  16641. + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
  16642. *
  16643. * See Documentation/locking/rt-mutex-design.txt for details.
  16644. */
  16645. @@ -16,6 +21,7 @@
  16646. #include <linux/sched/rt.h>
  16647. #include <linux/sched/deadline.h>
  16648. #include <linux/timer.h>
  16649. +#include <linux/ww_mutex.h>
  16650. #include "rtmutex_common.h"
  16651. @@ -69,6 +75,12 @@
  16652. clear_rt_mutex_waiters(lock);
  16653. }
  16654. +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
  16655. +{
  16656. + return waiter && waiter != PI_WAKEUP_INPROGRESS &&
  16657. + waiter != PI_REQUEUE_INPROGRESS;
  16658. +}
  16659. +
  16660. /*
  16661. * We can speed up the acquire/release, if the architecture
  16662. * supports cmpxchg and if there's no debugging state to be set up
  16663. @@ -300,7 +312,7 @@
  16664. * of task. We do not use the spin_xx_mutex() variants here as we are
  16665. * outside of the debug path.)
  16666. */
  16667. -static void rt_mutex_adjust_prio(struct task_struct *task)
  16668. +void rt_mutex_adjust_prio(struct task_struct *task)
  16669. {
  16670. unsigned long flags;
  16671. @@ -335,6 +347,14 @@
  16672. return debug_rt_mutex_detect_deadlock(waiter, chwalk);
  16673. }
  16674. +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
  16675. +{
  16676. + if (waiter->savestate)
  16677. + wake_up_lock_sleeper(waiter->task);
  16678. + else
  16679. + wake_up_process(waiter->task);
  16680. +}
  16681. +
  16682. /*
  16683. * Max number of times we'll walk the boosting chain:
  16684. */
  16685. @@ -342,7 +362,8 @@
  16686. static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
  16687. {
  16688. - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
  16689. + return rt_mutex_real_waiter(p->pi_blocked_on) ?
  16690. + p->pi_blocked_on->lock : NULL;
  16691. }
  16692. /*
  16693. @@ -479,7 +500,7 @@
  16694. * reached or the state of the chain has changed while we
  16695. * dropped the locks.
  16696. */
  16697. - if (!waiter)
  16698. + if (!rt_mutex_real_waiter(waiter))
  16699. goto out_unlock_pi;
  16700. /*
  16701. @@ -641,13 +662,16 @@
  16702. * follow here. This is the end of the chain we are walking.
  16703. */
  16704. if (!rt_mutex_owner(lock)) {
  16705. + struct rt_mutex_waiter *lock_top_waiter;
  16706. +
  16707. /*
  16708. * If the requeue [7] above changed the top waiter,
  16709. * then we need to wake the new top waiter up to try
  16710. * to get the lock.
  16711. */
  16712. - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
  16713. - wake_up_process(rt_mutex_top_waiter(lock)->task);
  16714. + lock_top_waiter = rt_mutex_top_waiter(lock);
  16715. + if (prerequeue_top_waiter != lock_top_waiter)
  16716. + rt_mutex_wake_waiter(lock_top_waiter);
  16717. raw_spin_unlock(&lock->wait_lock);
  16718. return 0;
  16719. }
  16720. @@ -740,6 +764,25 @@
  16721. return ret;
  16722. }
  16723. +
  16724. +#define STEAL_NORMAL 0
  16725. +#define STEAL_LATERAL 1
  16726. +
  16727. +/*
  16728. + * Note that RT tasks are excluded from lateral-steals to prevent the
  16729. + * introduction of an unbounded latency
  16730. + */
  16731. +static inline int lock_is_stealable(struct task_struct *task,
  16732. + struct task_struct *pendowner, int mode)
  16733. +{
  16734. + if (mode == STEAL_NORMAL || rt_task(task)) {
  16735. + if (task->prio >= pendowner->prio)
  16736. + return 0;
  16737. + } else if (task->prio > pendowner->prio)
  16738. + return 0;
  16739. + return 1;
  16740. +}
  16741. +
  16742. /*
  16743. * Try to take an rt-mutex
  16744. *
  16745. @@ -750,8 +793,9 @@
  16746. * @waiter: The waiter that is queued to the lock's wait list if the
  16747. * callsite called task_blocked_on_lock(), otherwise NULL
  16748. */
  16749. -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  16750. - struct rt_mutex_waiter *waiter)
  16751. +static int __try_to_take_rt_mutex(struct rt_mutex *lock,
  16752. + struct task_struct *task,
  16753. + struct rt_mutex_waiter *waiter, int mode)
  16754. {
  16755. unsigned long flags;
  16756. @@ -790,8 +834,10 @@
  16757. * If waiter is not the highest priority waiter of
  16758. * @lock, give up.
  16759. */
  16760. - if (waiter != rt_mutex_top_waiter(lock))
  16761. + if (waiter != rt_mutex_top_waiter(lock)) {
  16762. + /* XXX lock_is_stealable() ? */
  16763. return 0;
  16764. + }
  16765. /*
  16766. * We can acquire the lock. Remove the waiter from the
  16767. @@ -809,14 +855,10 @@
  16768. * not need to be dequeued.
  16769. */
  16770. if (rt_mutex_has_waiters(lock)) {
  16771. - /*
  16772. - * If @task->prio is greater than or equal to
  16773. - * the top waiter priority (kernel view),
  16774. - * @task lost.
  16775. - */
  16776. - if (task->prio >= rt_mutex_top_waiter(lock)->prio)
  16777. - return 0;
  16778. + struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
  16779. + if (task != pown && !lock_is_stealable(task, pown, mode))
  16780. + return 0;
  16781. /*
  16782. * The current top waiter stays enqueued. We
  16783. * don't have to change anything in the lock
  16784. @@ -865,6 +907,347 @@
  16785. return 1;
  16786. }
  16787. +#ifdef CONFIG_PREEMPT_RT_FULL
  16788. +/*
  16789. + * preemptible spin_lock functions:
  16790. + */
  16791. +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
  16792. + void (*slowfn)(struct rt_mutex *lock))
  16793. +{
  16794. + might_sleep_no_state_check();
  16795. +
  16796. + if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
  16797. + rt_mutex_deadlock_account_lock(lock, current);
  16798. + else
  16799. + slowfn(lock);
  16800. +}
  16801. +
  16802. +static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
  16803. + void (*slowfn)(struct rt_mutex *lock))
  16804. +{
  16805. + if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
  16806. + rt_mutex_deadlock_account_unlock(current);
  16807. + else
  16808. + slowfn(lock);
  16809. +}
  16810. +#ifdef CONFIG_SMP
  16811. +/*
  16812. + * Note that owner is a speculative pointer and dereferencing relies
  16813. + * on rcu_read_lock() and the check against the lock owner.
  16814. + */
  16815. +static int adaptive_wait(struct rt_mutex *lock,
  16816. + struct task_struct *owner)
  16817. +{
  16818. + int res = 0;
  16819. +
  16820. + rcu_read_lock();
  16821. + for (;;) {
  16822. + if (owner != rt_mutex_owner(lock))
  16823. + break;
  16824. + /*
  16825. + * Ensure that owner->on_cpu is dereferenced _after_
  16826. + * checking the above to be valid.
  16827. + */
  16828. + barrier();
  16829. + if (!owner->on_cpu) {
  16830. + res = 1;
  16831. + break;
  16832. + }
  16833. + cpu_relax();
  16834. + }
  16835. + rcu_read_unlock();
  16836. + return res;
  16837. +}
  16838. +#else
  16839. +static int adaptive_wait(struct rt_mutex *lock,
  16840. + struct task_struct *orig_owner)
  16841. +{
  16842. + return 1;
  16843. +}
  16844. +#endif
  16845. +
  16846. +# define pi_lock(lock) raw_spin_lock_irq(lock)
  16847. +# define pi_unlock(lock) raw_spin_unlock_irq(lock)
  16848. +
  16849. +static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
  16850. + struct rt_mutex_waiter *waiter,
  16851. + struct task_struct *task,
  16852. + enum rtmutex_chainwalk chwalk);
  16853. +/*
  16854. + * Slow path lock function spin_lock style: this variant is very
  16855. + * careful not to miss any non-lock wakeups.
  16856. + *
  16857. + * We store the current state under p->pi_lock in p->saved_state and
  16858. + * the try_to_wake_up() code handles this accordingly.
  16859. + */
  16860. +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock)
  16861. +{
  16862. + struct task_struct *lock_owner, *self = current;
  16863. + struct rt_mutex_waiter waiter, *top_waiter;
  16864. + int ret;
  16865. +
  16866. + rt_mutex_init_waiter(&waiter, true);
  16867. +
  16868. + raw_spin_lock(&lock->wait_lock);
  16869. +
  16870. + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
  16871. + raw_spin_unlock(&lock->wait_lock);
  16872. + return;
  16873. + }
  16874. +
  16875. + BUG_ON(rt_mutex_owner(lock) == self);
  16876. +
  16877. + /*
  16878. + * We save whatever state the task is in and we'll restore it
  16879. + * after acquiring the lock taking real wakeups into account
  16880. + * as well. We are serialized via pi_lock against wakeups. See
  16881. + * try_to_wake_up().
  16882. + */
  16883. + pi_lock(&self->pi_lock);
  16884. + self->saved_state = self->state;
  16885. + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
  16886. + pi_unlock(&self->pi_lock);
  16887. +
  16888. + ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
  16889. + BUG_ON(ret);
  16890. +
  16891. + for (;;) {
  16892. + /* Try to acquire the lock again. */
  16893. + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
  16894. + break;
  16895. +
  16896. + top_waiter = rt_mutex_top_waiter(lock);
  16897. + lock_owner = rt_mutex_owner(lock);
  16898. +
  16899. + raw_spin_unlock(&lock->wait_lock);
  16900. +
  16901. + debug_rt_mutex_print_deadlock(&waiter);
  16902. +
  16903. + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner))
  16904. + schedule_rt_mutex(lock);
  16905. +
  16906. + raw_spin_lock(&lock->wait_lock);
  16907. +
  16908. + pi_lock(&self->pi_lock);
  16909. + __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
  16910. + pi_unlock(&self->pi_lock);
  16911. + }
  16912. +
  16913. + /*
  16914. + * Restore the task state to current->saved_state. We set it
  16915. + * to the original state above and the try_to_wake_up() code
  16916. + * has possibly updated it when a real (non-rtmutex) wakeup
  16917. + * happened while we were blocked. Clear saved_state so
  16918. + * try_to_wakeup() does not get confused.
  16919. + */
  16920. + pi_lock(&self->pi_lock);
  16921. + __set_current_state_no_track(self->saved_state);
  16922. + self->saved_state = TASK_RUNNING;
  16923. + pi_unlock(&self->pi_lock);
  16924. +
  16925. + /*
  16926. + * try_to_take_rt_mutex() sets the waiter bit
  16927. + * unconditionally. We might have to fix that up:
  16928. + */
  16929. + fixup_rt_mutex_waiters(lock);
  16930. +
  16931. + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
  16932. + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
  16933. +
  16934. + raw_spin_unlock(&lock->wait_lock);
  16935. +
  16936. + debug_rt_mutex_free_waiter(&waiter);
  16937. +}
  16938. +
  16939. +static void wakeup_next_waiter(struct rt_mutex *lock);
  16940. +/*
  16941. + * Slow path to release a rt_mutex spin_lock style
  16942. + */
  16943. +static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
  16944. +{
  16945. + raw_spin_lock(&lock->wait_lock);
  16946. +
  16947. + debug_rt_mutex_unlock(lock);
  16948. +
  16949. + rt_mutex_deadlock_account_unlock(current);
  16950. +
  16951. + if (!rt_mutex_has_waiters(lock)) {
  16952. + lock->owner = NULL;
  16953. + raw_spin_unlock(&lock->wait_lock);
  16954. + return;
  16955. + }
  16956. +
  16957. + wakeup_next_waiter(lock);
  16958. +
  16959. + raw_spin_unlock(&lock->wait_lock);
  16960. +
  16961. + /* Undo pi boosting.when necessary */
  16962. + rt_mutex_adjust_prio(current);
  16963. +}
  16964. +
  16965. +void __lockfunc rt_spin_lock(spinlock_t *lock)
  16966. +{
  16967. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
  16968. + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
  16969. +}
  16970. +EXPORT_SYMBOL(rt_spin_lock);
  16971. +
  16972. +void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
  16973. +{
  16974. + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
  16975. +}
  16976. +EXPORT_SYMBOL(__rt_spin_lock);
  16977. +
  16978. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  16979. +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
  16980. +{
  16981. + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
  16982. + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
  16983. +}
  16984. +EXPORT_SYMBOL(rt_spin_lock_nested);
  16985. +#endif
  16986. +
  16987. +void __lockfunc rt_spin_unlock(spinlock_t *lock)
  16988. +{
  16989. + /* NOTE: we always pass in '1' for nested, for simplicity */
  16990. + spin_release(&lock->dep_map, 1, _RET_IP_);
  16991. + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
  16992. +}
  16993. +EXPORT_SYMBOL(rt_spin_unlock);
  16994. +
  16995. +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
  16996. +{
  16997. + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
  16998. +}
  16999. +EXPORT_SYMBOL(__rt_spin_unlock);
  17000. +
  17001. +/*
  17002. + * Wait for the lock to get unlocked: instead of polling for an unlock
  17003. + * (like raw spinlocks do), we lock and unlock, to force the kernel to
  17004. + * schedule if there's contention:
  17005. + */
  17006. +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
  17007. +{
  17008. + spin_lock(lock);
  17009. + spin_unlock(lock);
  17010. +}
  17011. +EXPORT_SYMBOL(rt_spin_unlock_wait);
  17012. +
  17013. +int __lockfunc __rt_spin_trylock(struct rt_mutex *lock)
  17014. +{
  17015. + return rt_mutex_trylock(lock);
  17016. +}
  17017. +
  17018. +int __lockfunc rt_spin_trylock(spinlock_t *lock)
  17019. +{
  17020. + int ret = rt_mutex_trylock(&lock->lock);
  17021. +
  17022. + if (ret)
  17023. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  17024. + return ret;
  17025. +}
  17026. +EXPORT_SYMBOL(rt_spin_trylock);
  17027. +
  17028. +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
  17029. +{
  17030. + int ret;
  17031. +
  17032. + local_bh_disable();
  17033. + ret = rt_mutex_trylock(&lock->lock);
  17034. + if (ret) {
  17035. + migrate_disable();
  17036. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  17037. + } else
  17038. + local_bh_enable();
  17039. + return ret;
  17040. +}
  17041. +EXPORT_SYMBOL(rt_spin_trylock_bh);
  17042. +
  17043. +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
  17044. +{
  17045. + int ret;
  17046. +
  17047. + *flags = 0;
  17048. + ret = rt_mutex_trylock(&lock->lock);
  17049. + if (ret) {
  17050. + migrate_disable();
  17051. + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  17052. + }
  17053. + return ret;
  17054. +}
  17055. +EXPORT_SYMBOL(rt_spin_trylock_irqsave);
  17056. +
  17057. +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
  17058. +{
  17059. + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
  17060. + if (atomic_add_unless(atomic, -1, 1))
  17061. + return 0;
  17062. + migrate_disable();
  17063. + rt_spin_lock(lock);
  17064. + if (atomic_dec_and_test(atomic))
  17065. + return 1;
  17066. + rt_spin_unlock(lock);
  17067. + migrate_enable();
  17068. + return 0;
  17069. +}
  17070. +EXPORT_SYMBOL(atomic_dec_and_spin_lock);
  17071. +
  17072. + void
  17073. +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
  17074. +{
  17075. +#ifdef CONFIG_DEBUG_LOCK_ALLOC
  17076. + /*
  17077. + * Make sure we are not reinitializing a held lock:
  17078. + */
  17079. + debug_check_no_locks_freed((void *)lock, sizeof(*lock));
  17080. + lockdep_init_map(&lock->dep_map, name, key, 0);
  17081. +#endif
  17082. +}
  17083. +EXPORT_SYMBOL(__rt_spin_lock_init);
  17084. +
  17085. +#endif /* PREEMPT_RT_FULL */
  17086. +
  17087. +#ifdef CONFIG_PREEMPT_RT_FULL
  17088. + static inline int __sched
  17089. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  17090. +{
  17091. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  17092. + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
  17093. +
  17094. + if (!hold_ctx)
  17095. + return 0;
  17096. +
  17097. + if (unlikely(ctx == hold_ctx))
  17098. + return -EALREADY;
  17099. +
  17100. + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
  17101. + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
  17102. +#ifdef CONFIG_DEBUG_MUTEXES
  17103. + DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
  17104. + ctx->contending_lock = ww;
  17105. +#endif
  17106. + return -EDEADLK;
  17107. + }
  17108. +
  17109. + return 0;
  17110. +}
  17111. +#else
  17112. + static inline int __sched
  17113. +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
  17114. +{
  17115. + BUG();
  17116. + return 0;
  17117. +}
  17118. +
  17119. +#endif
  17120. +
  17121. +static inline int
  17122. +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
  17123. + struct rt_mutex_waiter *waiter)
  17124. +{
  17125. + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
  17126. +}
  17127. +
  17128. /*
  17129. * Task blocks on lock.
  17130. *
  17131. @@ -896,6 +1279,23 @@
  17132. return -EDEADLK;
  17133. raw_spin_lock_irqsave(&task->pi_lock, flags);
  17134. +
  17135. + /*
  17136. + * In the case of futex requeue PI, this will be a proxy
  17137. + * lock. The task will wake unaware that it is enqueueed on
  17138. + * this lock. Avoid blocking on two locks and corrupting
  17139. + * pi_blocked_on via the PI_WAKEUP_INPROGRESS
  17140. + * flag. futex_wait_requeue_pi() sets this when it wakes up
  17141. + * before requeue (due to a signal or timeout). Do not enqueue
  17142. + * the task if PI_WAKEUP_INPROGRESS is set.
  17143. + */
  17144. + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
  17145. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17146. + return -EAGAIN;
  17147. + }
  17148. +
  17149. + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
  17150. +
  17151. __rt_mutex_adjust_prio(task);
  17152. waiter->task = task;
  17153. waiter->lock = lock;
  17154. @@ -919,7 +1319,7 @@
  17155. rt_mutex_enqueue_pi(owner, waiter);
  17156. __rt_mutex_adjust_prio(owner);
  17157. - if (owner->pi_blocked_on)
  17158. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  17159. chain_walk = 1;
  17160. } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
  17161. chain_walk = 1;
  17162. @@ -957,8 +1357,9 @@
  17163. /*
  17164. * Wake up the next waiter on the lock.
  17165. *
  17166. - * Remove the top waiter from the current tasks pi waiter list and
  17167. - * wake it up.
  17168. + * Remove the top waiter from the current tasks pi waiter list,
  17169. + * wake it up and return whether the current task needs to undo
  17170. + * a potential priority boosting.
  17171. *
  17172. * Called with lock->wait_lock held.
  17173. */
  17174. @@ -996,7 +1397,7 @@
  17175. * long as we hold lock->wait_lock. The waiter task needs to
  17176. * acquire it in order to dequeue the waiter.
  17177. */
  17178. - wake_up_process(waiter->task);
  17179. + rt_mutex_wake_waiter(waiter);
  17180. }
  17181. /*
  17182. @@ -1010,7 +1411,7 @@
  17183. {
  17184. bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
  17185. struct task_struct *owner = rt_mutex_owner(lock);
  17186. - struct rt_mutex *next_lock;
  17187. + struct rt_mutex *next_lock = NULL;
  17188. unsigned long flags;
  17189. raw_spin_lock_irqsave(&current->pi_lock, flags);
  17190. @@ -1035,7 +1436,8 @@
  17191. __rt_mutex_adjust_prio(owner);
  17192. /* Store the lock on which owner is blocked or NULL */
  17193. - next_lock = task_blocked_on_lock(owner);
  17194. + if (rt_mutex_real_waiter(owner->pi_blocked_on))
  17195. + next_lock = task_blocked_on_lock(owner);
  17196. raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
  17197. @@ -1071,17 +1473,17 @@
  17198. raw_spin_lock_irqsave(&task->pi_lock, flags);
  17199. waiter = task->pi_blocked_on;
  17200. - if (!waiter || (waiter->prio == task->prio &&
  17201. + if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
  17202. !dl_prio(task->prio))) {
  17203. raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17204. return;
  17205. }
  17206. next_lock = waiter->lock;
  17207. - raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17208. /* gets dropped in rt_mutex_adjust_prio_chain()! */
  17209. get_task_struct(task);
  17210. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  17211. rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
  17212. next_lock, NULL, task);
  17213. }
  17214. @@ -1099,7 +1501,8 @@
  17215. static int __sched
  17216. __rt_mutex_slowlock(struct rt_mutex *lock, int state,
  17217. struct hrtimer_sleeper *timeout,
  17218. - struct rt_mutex_waiter *waiter)
  17219. + struct rt_mutex_waiter *waiter,
  17220. + struct ww_acquire_ctx *ww_ctx)
  17221. {
  17222. int ret = 0;
  17223. @@ -1122,6 +1525,12 @@
  17224. break;
  17225. }
  17226. + if (ww_ctx && ww_ctx->acquired > 0) {
  17227. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  17228. + if (ret)
  17229. + break;
  17230. + }
  17231. +
  17232. raw_spin_unlock(&lock->wait_lock);
  17233. debug_rt_mutex_print_deadlock(waiter);
  17234. @@ -1156,25 +1565,102 @@
  17235. }
  17236. }
  17237. +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
  17238. + struct ww_acquire_ctx *ww_ctx)
  17239. +{
  17240. +#ifdef CONFIG_DEBUG_MUTEXES
  17241. + /*
  17242. + * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
  17243. + * but released with a normal mutex_unlock in this call.
  17244. + *
  17245. + * This should never happen, always use ww_mutex_unlock.
  17246. + */
  17247. + DEBUG_LOCKS_WARN_ON(ww->ctx);
  17248. +
  17249. + /*
  17250. + * Not quite done after calling ww_acquire_done() ?
  17251. + */
  17252. + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
  17253. +
  17254. + if (ww_ctx->contending_lock) {
  17255. + /*
  17256. + * After -EDEADLK you tried to
  17257. + * acquire a different ww_mutex? Bad!
  17258. + */
  17259. + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
  17260. +
  17261. + /*
  17262. + * You called ww_mutex_lock after receiving -EDEADLK,
  17263. + * but 'forgot' to unlock everything else first?
  17264. + */
  17265. + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
  17266. + ww_ctx->contending_lock = NULL;
  17267. + }
  17268. +
  17269. + /*
  17270. + * Naughty, using a different class will lead to undefined behavior!
  17271. + */
  17272. + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
  17273. +#endif
  17274. + ww_ctx->acquired++;
  17275. +}
  17276. +
  17277. +#ifdef CONFIG_PREEMPT_RT_FULL
  17278. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  17279. + struct ww_acquire_ctx *ww_ctx)
  17280. +{
  17281. + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
  17282. + struct rt_mutex_waiter *waiter, *n;
  17283. +
  17284. + /*
  17285. + * This branch gets optimized out for the common case,
  17286. + * and is only important for ww_mutex_lock.
  17287. + */
  17288. + ww_mutex_lock_acquired(ww, ww_ctx);
  17289. + ww->ctx = ww_ctx;
  17290. +
  17291. + /*
  17292. + * Give any possible sleeping processes the chance to wake up,
  17293. + * so they can recheck if they have to back off.
  17294. + */
  17295. + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
  17296. + tree_entry) {
  17297. + /* XXX debug rt mutex waiter wakeup */
  17298. +
  17299. + BUG_ON(waiter->lock != lock);
  17300. + rt_mutex_wake_waiter(waiter);
  17301. + }
  17302. +}
  17303. +
  17304. +#else
  17305. +
  17306. +static void ww_mutex_account_lock(struct rt_mutex *lock,
  17307. + struct ww_acquire_ctx *ww_ctx)
  17308. +{
  17309. + BUG();
  17310. +}
  17311. +#endif
  17312. +
  17313. /*
  17314. * Slow path lock function:
  17315. */
  17316. static int __sched
  17317. rt_mutex_slowlock(struct rt_mutex *lock, int state,
  17318. struct hrtimer_sleeper *timeout,
  17319. - enum rtmutex_chainwalk chwalk)
  17320. + enum rtmutex_chainwalk chwalk,
  17321. + struct ww_acquire_ctx *ww_ctx)
  17322. {
  17323. struct rt_mutex_waiter waiter;
  17324. int ret = 0;
  17325. - debug_rt_mutex_init_waiter(&waiter);
  17326. - RB_CLEAR_NODE(&waiter.pi_tree_entry);
  17327. - RB_CLEAR_NODE(&waiter.tree_entry);
  17328. + rt_mutex_init_waiter(&waiter, false);
  17329. raw_spin_lock(&lock->wait_lock);
  17330. /* Try to acquire the lock again: */
  17331. if (try_to_take_rt_mutex(lock, current, NULL)) {
  17332. + if (ww_ctx)
  17333. + ww_mutex_account_lock(lock, ww_ctx);
  17334. raw_spin_unlock(&lock->wait_lock);
  17335. return 0;
  17336. }
  17337. @@ -1192,13 +1678,23 @@
  17338. if (likely(!ret))
  17339. /* sleep on the mutex */
  17340. - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
  17341. + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
  17342. + ww_ctx);
  17343. + else if (ww_ctx) {
  17344. + /* ww_mutex received EDEADLK, let it become EALREADY */
  17345. + ret = __mutex_lock_check_stamp(lock, ww_ctx);
  17346. + BUG_ON(!ret);
  17347. + }
  17348. if (unlikely(ret)) {
  17349. __set_current_state(TASK_RUNNING);
  17350. if (rt_mutex_has_waiters(lock))
  17351. remove_waiter(lock, &waiter);
  17352. - rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  17353. + /* ww_mutex want to report EDEADLK/EALREADY, let them */
  17354. + if (!ww_ctx)
  17355. + rt_mutex_handle_deadlock(ret, chwalk, &waiter);
  17356. + } else if (ww_ctx) {
  17357. + ww_mutex_account_lock(lock, ww_ctx);
  17358. }
  17359. /*
  17360. @@ -1255,7 +1751,7 @@
  17361. /*
  17362. * Slow path to release a rt-mutex:
  17363. */
  17364. -static void __sched
  17365. +static bool __sched
  17366. rt_mutex_slowunlock(struct rt_mutex *lock)
  17367. {
  17368. raw_spin_lock(&lock->wait_lock);
  17369. @@ -1298,7 +1794,7 @@
  17370. while (!rt_mutex_has_waiters(lock)) {
  17371. /* Drops lock->wait_lock ! */
  17372. if (unlock_rt_mutex_safe(lock) == true)
  17373. - return;
  17374. + return false;
  17375. /* Relock the rtmutex and try again */
  17376. raw_spin_lock(&lock->wait_lock);
  17377. }
  17378. @@ -1311,8 +1807,7 @@
  17379. raw_spin_unlock(&lock->wait_lock);
  17380. - /* Undo pi boosting if necessary: */
  17381. - rt_mutex_adjust_prio(current);
  17382. + return true;
  17383. }
  17384. /*
  17385. @@ -1323,31 +1818,36 @@
  17386. */
  17387. static inline int
  17388. rt_mutex_fastlock(struct rt_mutex *lock, int state,
  17389. + struct ww_acquire_ctx *ww_ctx,
  17390. int (*slowfn)(struct rt_mutex *lock, int state,
  17391. struct hrtimer_sleeper *timeout,
  17392. - enum rtmutex_chainwalk chwalk))
  17393. + enum rtmutex_chainwalk chwalk,
  17394. + struct ww_acquire_ctx *ww_ctx))
  17395. {
  17396. if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
  17397. rt_mutex_deadlock_account_lock(lock, current);
  17398. return 0;
  17399. } else
  17400. - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
  17401. + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK,
  17402. + ww_ctx);
  17403. }
  17404. static inline int
  17405. rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
  17406. struct hrtimer_sleeper *timeout,
  17407. enum rtmutex_chainwalk chwalk,
  17408. + struct ww_acquire_ctx *ww_ctx,
  17409. int (*slowfn)(struct rt_mutex *lock, int state,
  17410. struct hrtimer_sleeper *timeout,
  17411. - enum rtmutex_chainwalk chwalk))
  17412. + enum rtmutex_chainwalk chwalk,
  17413. + struct ww_acquire_ctx *ww_ctx))
  17414. {
  17415. if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
  17416. likely(rt_mutex_cmpxchg(lock, NULL, current))) {
  17417. rt_mutex_deadlock_account_lock(lock, current);
  17418. return 0;
  17419. } else
  17420. - return slowfn(lock, state, timeout, chwalk);
  17421. + return slowfn(lock, state, timeout, chwalk, ww_ctx);
  17422. }
  17423. static inline int
  17424. @@ -1363,12 +1863,14 @@
  17425. static inline void
  17426. rt_mutex_fastunlock(struct rt_mutex *lock,
  17427. - void (*slowfn)(struct rt_mutex *lock))
  17428. + bool (*slowfn)(struct rt_mutex *lock))
  17429. {
  17430. - if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
  17431. + if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
  17432. rt_mutex_deadlock_account_unlock(current);
  17433. - else
  17434. - slowfn(lock);
  17435. + } else if (slowfn(lock)) {
  17436. + /* Undo pi boosting if necessary: */
  17437. + rt_mutex_adjust_prio(current);
  17438. + }
  17439. }
  17440. /**
  17441. @@ -1380,7 +1882,7 @@
  17442. {
  17443. might_sleep();
  17444. - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
  17445. + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock);
  17446. }
  17447. EXPORT_SYMBOL_GPL(rt_mutex_lock);
  17448. @@ -1397,7 +1899,7 @@
  17449. {
  17450. might_sleep();
  17451. - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
  17452. + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock);
  17453. }
  17454. EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
  17455. @@ -1410,11 +1912,30 @@
  17456. might_sleep();
  17457. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  17458. - RT_MUTEX_FULL_CHAINWALK,
  17459. + RT_MUTEX_FULL_CHAINWALK, NULL,
  17460. rt_mutex_slowlock);
  17461. }
  17462. /**
  17463. + * rt_mutex_lock_killable - lock a rt_mutex killable
  17464. + *
  17465. + * @lock: the rt_mutex to be locked
  17466. + * @detect_deadlock: deadlock detection on/off
  17467. + *
  17468. + * Returns:
  17469. + * 0 on success
  17470. + * -EINTR when interrupted by a signal
  17471. + * -EDEADLK when the lock would deadlock (when deadlock detection is on)
  17472. + */
  17473. +int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
  17474. +{
  17475. + might_sleep();
  17476. +
  17477. + return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock);
  17478. +}
  17479. +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
  17480. +
  17481. +/**
  17482. * rt_mutex_timed_lock - lock a rt_mutex interruptible
  17483. * the timeout structure is provided
  17484. * by the caller
  17485. @@ -1434,6 +1955,7 @@
  17486. return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
  17487. RT_MUTEX_MIN_CHAINWALK,
  17488. + NULL,
  17489. rt_mutex_slowlock);
  17490. }
  17491. EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
  17492. @@ -1463,6 +1985,22 @@
  17493. EXPORT_SYMBOL_GPL(rt_mutex_unlock);
  17494. /**
  17495. + * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
  17496. + * @lock: the rt_mutex to be unlocked
  17497. + *
  17498. + * Returns: true/false indicating whether priority adjustment is
  17499. + * required or not.
  17500. + */
  17501. +bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
  17502. +{
  17503. + if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
  17504. + rt_mutex_deadlock_account_unlock(current);
  17505. + return false;
  17506. + }
  17507. + return rt_mutex_slowunlock(lock);
  17508. +}
  17509. +
  17510. +/**
  17511. * rt_mutex_destroy - mark a mutex unusable
  17512. * @lock: the mutex to be destroyed
  17513. *
  17514. @@ -1492,13 +2030,12 @@
  17515. void __rt_mutex_init(struct rt_mutex *lock, const char *name)
  17516. {
  17517. lock->owner = NULL;
  17518. - raw_spin_lock_init(&lock->wait_lock);
  17519. lock->waiters = RB_ROOT;
  17520. lock->waiters_leftmost = NULL;
  17521. debug_rt_mutex_init(lock, name);
  17522. }
  17523. -EXPORT_SYMBOL_GPL(__rt_mutex_init);
  17524. +EXPORT_SYMBOL(__rt_mutex_init);
  17525. /**
  17526. * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
  17527. @@ -1513,7 +2050,7 @@
  17528. void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  17529. struct task_struct *proxy_owner)
  17530. {
  17531. - __rt_mutex_init(lock, NULL);
  17532. + rt_mutex_init(lock);
  17533. debug_rt_mutex_proxy_lock(lock, proxy_owner);
  17534. rt_mutex_set_owner(lock, proxy_owner);
  17535. rt_mutex_deadlock_account_lock(lock, proxy_owner);
  17536. @@ -1561,6 +2098,35 @@
  17537. return 1;
  17538. }
  17539. +#ifdef CONFIG_PREEMPT_RT_FULL
  17540. + /*
  17541. + * In PREEMPT_RT there's an added race.
  17542. + * If the task, that we are about to requeue, times out,
  17543. + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
  17544. + * to skip this task. But right after the task sets
  17545. + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
  17546. + * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
  17547. + * This will replace the PI_WAKEUP_INPROGRESS with the actual
  17548. + * lock that it blocks on. We *must not* place this task
  17549. + * on this proxy lock in that case.
  17550. + *
  17551. + * To prevent this race, we first take the task's pi_lock
  17552. + * and check if it has updated its pi_blocked_on. If it has,
  17553. + * we assume that it woke up and we return -EAGAIN.
  17554. + * Otherwise, we set the task's pi_blocked_on to
  17555. + * PI_REQUEUE_INPROGRESS, so that if the task is waking up
  17556. + * it will know that we are in the process of requeuing it.
  17557. + */
  17558. + raw_spin_lock_irq(&task->pi_lock);
  17559. + if (task->pi_blocked_on) {
  17560. + raw_spin_unlock_irq(&task->pi_lock);
  17561. + raw_spin_unlock(&lock->wait_lock);
  17562. + return -EAGAIN;
  17563. + }
  17564. + task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
  17565. + raw_spin_unlock_irq(&task->pi_lock);
  17566. +#endif
  17567. +
  17568. /* We enforce deadlock detection for futexes */
  17569. ret = task_blocks_on_rt_mutex(lock, waiter, task,
  17570. RT_MUTEX_FULL_CHAINWALK);
  17571. @@ -1575,7 +2141,7 @@
  17572. ret = 0;
  17573. }
  17574. - if (unlikely(ret))
  17575. + if (ret && rt_mutex_has_waiters(lock))
  17576. remove_waiter(lock, waiter);
  17577. raw_spin_unlock(&lock->wait_lock);
  17578. @@ -1631,7 +2197,7 @@
  17579. set_current_state(TASK_INTERRUPTIBLE);
  17580. /* sleep on the mutex */
  17581. - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
  17582. + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
  17583. if (unlikely(ret))
  17584. remove_waiter(lock, waiter);
  17585. @@ -1646,3 +2212,89 @@
  17586. return ret;
  17587. }
  17588. +
  17589. +static inline int
  17590. +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
  17591. +{
  17592. +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
  17593. + unsigned tmp;
  17594. +
  17595. + if (ctx->deadlock_inject_countdown-- == 0) {
  17596. + tmp = ctx->deadlock_inject_interval;
  17597. + if (tmp > UINT_MAX/4)
  17598. + tmp = UINT_MAX;
  17599. + else
  17600. + tmp = tmp*2 + tmp + tmp/2;
  17601. +
  17602. + ctx->deadlock_inject_interval = tmp;
  17603. + ctx->deadlock_inject_countdown = tmp;
  17604. + ctx->contending_lock = lock;
  17605. +
  17606. + ww_mutex_unlock(lock);
  17607. +
  17608. + return -EDEADLK;
  17609. + }
  17610. +#endif
  17611. +
  17612. + return 0;
  17613. +}
  17614. +
  17615. +#ifdef CONFIG_PREEMPT_RT_FULL
  17616. +int __sched
  17617. +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  17618. +{
  17619. + int ret;
  17620. +
  17621. + might_sleep();
  17622. +
  17623. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  17624. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
  17625. + if (ret)
  17626. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  17627. + else if (!ret && ww_ctx->acquired > 1)
  17628. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  17629. +
  17630. + return ret;
  17631. +}
  17632. +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
  17633. +
  17634. +int __sched
  17635. +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
  17636. +{
  17637. + int ret;
  17638. +
  17639. + might_sleep();
  17640. +
  17641. + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
  17642. + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
  17643. + if (ret)
  17644. + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
  17645. + else if (!ret && ww_ctx->acquired > 1)
  17646. + return ww_mutex_deadlock_injection(lock, ww_ctx);
  17647. +
  17648. + return ret;
  17649. +}
  17650. +EXPORT_SYMBOL_GPL(__ww_mutex_lock);
  17651. +
  17652. +void __sched ww_mutex_unlock(struct ww_mutex *lock)
  17653. +{
  17654. + int nest = !!lock->ctx;
  17655. +
  17656. + /*
  17657. + * The unlocking fastpath is the 0->1 transition from 'locked'
  17658. + * into 'unlocked' state:
  17659. + */
  17660. + if (nest) {
  17661. +#ifdef CONFIG_DEBUG_MUTEXES
  17662. + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
  17663. +#endif
  17664. + if (lock->ctx->acquired > 0)
  17665. + lock->ctx->acquired--;
  17666. + lock->ctx = NULL;
  17667. + }
  17668. +
  17669. + mutex_release(&lock->base.dep_map, nest, _RET_IP_);
  17670. + rt_mutex_unlock(&lock->base.lock);
  17671. +}
  17672. +EXPORT_SYMBOL(ww_mutex_unlock);
  17673. +#endif
  17674. diff -Nur linux-4.1.39.orig/kernel/locking/rtmutex_common.h linux-4.1.39/kernel/locking/rtmutex_common.h
  17675. --- linux-4.1.39.orig/kernel/locking/rtmutex_common.h 2017-03-13 21:04:36.000000000 +0100
  17676. +++ linux-4.1.39/kernel/locking/rtmutex_common.h 2017-04-18 17:56:30.625397596 +0200
  17677. @@ -49,6 +49,7 @@
  17678. struct rb_node pi_tree_entry;
  17679. struct task_struct *task;
  17680. struct rt_mutex *lock;
  17681. + bool savestate;
  17682. #ifdef CONFIG_DEBUG_RT_MUTEXES
  17683. unsigned long ip;
  17684. struct pid *deadlock_task_pid;
  17685. @@ -119,6 +120,9 @@
  17686. /*
  17687. * PI-futex support (proxy locking functions, etc.):
  17688. */
  17689. +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
  17690. +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
  17691. +
  17692. extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
  17693. extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  17694. struct task_struct *proxy_owner);
  17695. @@ -132,10 +136,24 @@
  17696. struct rt_mutex_waiter *waiter);
  17697. extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
  17698. +extern bool rt_mutex_futex_unlock(struct rt_mutex *lock);
  17699. +
  17700. +extern void rt_mutex_adjust_prio(struct task_struct *task);
  17701. +
  17702. #ifdef CONFIG_DEBUG_RT_MUTEXES
  17703. # include "rtmutex-debug.h"
  17704. #else
  17705. # include "rtmutex.h"
  17706. #endif
  17707. +static inline void
  17708. +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate)
  17709. +{
  17710. + debug_rt_mutex_init_waiter(waiter);
  17711. + waiter->task = NULL;
  17712. + waiter->savestate = savestate;
  17713. + RB_CLEAR_NODE(&waiter->pi_tree_entry);
  17714. + RB_CLEAR_NODE(&waiter->tree_entry);
  17715. +}
  17716. +
  17717. #endif
  17718. diff -Nur linux-4.1.39.orig/kernel/locking/spinlock.c linux-4.1.39/kernel/locking/spinlock.c
  17719. --- linux-4.1.39.orig/kernel/locking/spinlock.c 2017-03-13 21:04:36.000000000 +0100
  17720. +++ linux-4.1.39/kernel/locking/spinlock.c 2017-04-18 17:56:30.625397596 +0200
  17721. @@ -124,8 +124,11 @@
  17722. * __[spin|read|write]_lock_bh()
  17723. */
  17724. BUILD_LOCK_OPS(spin, raw_spinlock);
  17725. +
  17726. +#ifndef CONFIG_PREEMPT_RT_FULL
  17727. BUILD_LOCK_OPS(read, rwlock);
  17728. BUILD_LOCK_OPS(write, rwlock);
  17729. +#endif
  17730. #endif
  17731. @@ -209,6 +212,8 @@
  17732. EXPORT_SYMBOL(_raw_spin_unlock_bh);
  17733. #endif
  17734. +#ifndef CONFIG_PREEMPT_RT_FULL
  17735. +
  17736. #ifndef CONFIG_INLINE_READ_TRYLOCK
  17737. int __lockfunc _raw_read_trylock(rwlock_t *lock)
  17738. {
  17739. @@ -353,6 +358,8 @@
  17740. EXPORT_SYMBOL(_raw_write_unlock_bh);
  17741. #endif
  17742. +#endif /* !PREEMPT_RT_FULL */
  17743. +
  17744. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  17745. void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
  17746. diff -Nur linux-4.1.39.orig/kernel/locking/spinlock_debug.c linux-4.1.39/kernel/locking/spinlock_debug.c
  17747. --- linux-4.1.39.orig/kernel/locking/spinlock_debug.c 2017-03-13 21:04:36.000000000 +0100
  17748. +++ linux-4.1.39/kernel/locking/spinlock_debug.c 2017-04-18 17:56:30.625397596 +0200
  17749. @@ -31,6 +31,7 @@
  17750. EXPORT_SYMBOL(__raw_spin_lock_init);
  17751. +#ifndef CONFIG_PREEMPT_RT_FULL
  17752. void __rwlock_init(rwlock_t *lock, const char *name,
  17753. struct lock_class_key *key)
  17754. {
  17755. @@ -48,6 +49,7 @@
  17756. }
  17757. EXPORT_SYMBOL(__rwlock_init);
  17758. +#endif
  17759. static void spin_dump(raw_spinlock_t *lock, const char *msg)
  17760. {
  17761. @@ -159,6 +161,7 @@
  17762. arch_spin_unlock(&lock->raw_lock);
  17763. }
  17764. +#ifndef CONFIG_PREEMPT_RT_FULL
  17765. static void rwlock_bug(rwlock_t *lock, const char *msg)
  17766. {
  17767. if (!debug_locks_off())
  17768. @@ -300,3 +303,5 @@
  17769. debug_write_unlock(lock);
  17770. arch_write_unlock(&lock->raw_lock);
  17771. }
  17772. +
  17773. +#endif
  17774. diff -Nur linux-4.1.39.orig/kernel/module.c linux-4.1.39/kernel/module.c
  17775. --- linux-4.1.39.orig/kernel/module.c 2017-03-13 21:04:36.000000000 +0100
  17776. +++ linux-4.1.39/kernel/module.c 2017-04-18 17:56:30.625397596 +0200
  17777. @@ -525,16 +525,7 @@
  17778. memcpy(per_cpu_ptr(mod->percpu, cpu), from, size);
  17779. }
  17780. -/**
  17781. - * is_module_percpu_address - test whether address is from module static percpu
  17782. - * @addr: address to test
  17783. - *
  17784. - * Test whether @addr belongs to module static percpu area.
  17785. - *
  17786. - * RETURNS:
  17787. - * %true if @addr is from module static percpu area
  17788. - */
  17789. -bool is_module_percpu_address(unsigned long addr)
  17790. +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
  17791. {
  17792. struct module *mod;
  17793. unsigned int cpu;
  17794. @@ -548,9 +539,11 @@
  17795. continue;
  17796. for_each_possible_cpu(cpu) {
  17797. void *start = per_cpu_ptr(mod->percpu, cpu);
  17798. + void *va = (void *)addr;
  17799. - if ((void *)addr >= start &&
  17800. - (void *)addr < start + mod->percpu_size) {
  17801. + if (va >= start && va < start + mod->percpu_size) {
  17802. + if (can_addr)
  17803. + *can_addr = (unsigned long) (va - start);
  17804. preempt_enable();
  17805. return true;
  17806. }
  17807. @@ -561,6 +554,20 @@
  17808. return false;
  17809. }
  17810. +/**
  17811. + * is_module_percpu_address - test whether address is from module static percpu
  17812. + * @addr: address to test
  17813. + *
  17814. + * Test whether @addr belongs to module static percpu area.
  17815. + *
  17816. + * RETURNS:
  17817. + * %true if @addr is from module static percpu area
  17818. + */
  17819. +bool is_module_percpu_address(unsigned long addr)
  17820. +{
  17821. + return __is_module_percpu_address(addr, NULL);
  17822. +}
  17823. +
  17824. #else /* ... !CONFIG_SMP */
  17825. static inline void __percpu *mod_percpu(struct module *mod)
  17826. @@ -591,6 +598,11 @@
  17827. {
  17828. return false;
  17829. }
  17830. +
  17831. +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
  17832. +{
  17833. + return false;
  17834. +}
  17835. #endif /* CONFIG_SMP */
  17836. diff -Nur linux-4.1.39.orig/kernel/panic.c linux-4.1.39/kernel/panic.c
  17837. --- linux-4.1.39.orig/kernel/panic.c 2017-03-13 21:04:36.000000000 +0100
  17838. +++ linux-4.1.39/kernel/panic.c 2017-04-18 17:56:30.625397596 +0200
  17839. @@ -399,9 +399,11 @@
  17840. static int init_oops_id(void)
  17841. {
  17842. +#ifndef CONFIG_PREEMPT_RT_FULL
  17843. if (!oops_id)
  17844. get_random_bytes(&oops_id, sizeof(oops_id));
  17845. else
  17846. +#endif
  17847. oops_id++;
  17848. return 0;
  17849. diff -Nur linux-4.1.39.orig/kernel/power/hibernate.c linux-4.1.39/kernel/power/hibernate.c
  17850. --- linux-4.1.39.orig/kernel/power/hibernate.c 2017-03-13 21:04:36.000000000 +0100
  17851. +++ linux-4.1.39/kernel/power/hibernate.c 2017-04-18 17:56:30.625397596 +0200
  17852. @@ -285,6 +285,8 @@
  17853. local_irq_disable();
  17854. + system_state = SYSTEM_SUSPEND;
  17855. +
  17856. error = syscore_suspend();
  17857. if (error) {
  17858. printk(KERN_ERR "PM: Some system devices failed to power down, "
  17859. @@ -314,6 +316,7 @@
  17860. syscore_resume();
  17861. Enable_irqs:
  17862. + system_state = SYSTEM_RUNNING;
  17863. local_irq_enable();
  17864. Enable_cpus:
  17865. @@ -437,6 +440,7 @@
  17866. goto Enable_cpus;
  17867. local_irq_disable();
  17868. + system_state = SYSTEM_SUSPEND;
  17869. error = syscore_suspend();
  17870. if (error)
  17871. @@ -470,6 +474,7 @@
  17872. syscore_resume();
  17873. Enable_irqs:
  17874. + system_state = SYSTEM_RUNNING;
  17875. local_irq_enable();
  17876. Enable_cpus:
  17877. @@ -555,6 +560,7 @@
  17878. goto Platform_finish;
  17879. local_irq_disable();
  17880. + system_state = SYSTEM_SUSPEND;
  17881. syscore_suspend();
  17882. if (pm_wakeup_pending()) {
  17883. error = -EAGAIN;
  17884. @@ -567,6 +573,7 @@
  17885. Power_up:
  17886. syscore_resume();
  17887. + system_state = SYSTEM_RUNNING;
  17888. local_irq_enable();
  17889. enable_nonboot_cpus();
  17890. diff -Nur linux-4.1.39.orig/kernel/power/suspend.c linux-4.1.39/kernel/power/suspend.c
  17891. --- linux-4.1.39.orig/kernel/power/suspend.c 2017-03-13 21:04:36.000000000 +0100
  17892. +++ linux-4.1.39/kernel/power/suspend.c 2017-04-18 17:56:30.625397596 +0200
  17893. @@ -356,6 +356,8 @@
  17894. arch_suspend_disable_irqs();
  17895. BUG_ON(!irqs_disabled());
  17896. + system_state = SYSTEM_SUSPEND;
  17897. +
  17898. error = syscore_suspend();
  17899. if (!error) {
  17900. *wakeup = pm_wakeup_pending();
  17901. @@ -370,6 +372,8 @@
  17902. syscore_resume();
  17903. }
  17904. + system_state = SYSTEM_RUNNING;
  17905. +
  17906. arch_suspend_enable_irqs();
  17907. BUG_ON(irqs_disabled());
  17908. diff -Nur linux-4.1.39.orig/kernel/printk/printk.c linux-4.1.39/kernel/printk/printk.c
  17909. --- linux-4.1.39.orig/kernel/printk/printk.c 2017-03-13 21:04:36.000000000 +0100
  17910. +++ linux-4.1.39/kernel/printk/printk.c 2017-04-18 17:56:30.625397596 +0200
  17911. @@ -1163,6 +1163,7 @@
  17912. {
  17913. char *text;
  17914. int len = 0;
  17915. + int attempts = 0;
  17916. text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
  17917. if (!text)
  17918. @@ -1174,7 +1175,14 @@
  17919. u64 seq;
  17920. u32 idx;
  17921. enum log_flags prev;
  17922. -
  17923. + int num_msg;
  17924. +try_again:
  17925. + attempts++;
  17926. + if (attempts > 10) {
  17927. + len = -EBUSY;
  17928. + goto out;
  17929. + }
  17930. + num_msg = 0;
  17931. if (clear_seq < log_first_seq) {
  17932. /* messages are gone, move to first available one */
  17933. clear_seq = log_first_seq;
  17934. @@ -1195,6 +1203,14 @@
  17935. prev = msg->flags;
  17936. idx = log_next(idx);
  17937. seq++;
  17938. + num_msg++;
  17939. + if (num_msg > 5) {
  17940. + num_msg = 0;
  17941. + raw_spin_unlock_irq(&logbuf_lock);
  17942. + raw_spin_lock_irq(&logbuf_lock);
  17943. + if (clear_seq < log_first_seq)
  17944. + goto try_again;
  17945. + }
  17946. }
  17947. /* move first record forward until length fits into the buffer */
  17948. @@ -1208,6 +1224,14 @@
  17949. prev = msg->flags;
  17950. idx = log_next(idx);
  17951. seq++;
  17952. + num_msg++;
  17953. + if (num_msg > 5) {
  17954. + num_msg = 0;
  17955. + raw_spin_unlock_irq(&logbuf_lock);
  17956. + raw_spin_lock_irq(&logbuf_lock);
  17957. + if (clear_seq < log_first_seq)
  17958. + goto try_again;
  17959. + }
  17960. }
  17961. /* last message fitting into this dump */
  17962. @@ -1248,6 +1272,7 @@
  17963. clear_seq = log_next_seq;
  17964. clear_idx = log_next_idx;
  17965. }
  17966. +out:
  17967. raw_spin_unlock_irq(&logbuf_lock);
  17968. kfree(text);
  17969. @@ -1401,6 +1426,12 @@
  17970. if (!console_drivers)
  17971. return;
  17972. + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
  17973. + if (in_irq() || in_nmi())
  17974. + return;
  17975. + }
  17976. +
  17977. + migrate_disable();
  17978. for_each_console(con) {
  17979. if (exclusive_console && con != exclusive_console)
  17980. continue;
  17981. @@ -1413,6 +1444,7 @@
  17982. continue;
  17983. con->write(con, text, len);
  17984. }
  17985. + migrate_enable();
  17986. }
  17987. /*
  17988. @@ -1473,6 +1505,15 @@
  17989. static int console_trylock_for_printk(void)
  17990. {
  17991. unsigned int cpu = smp_processor_id();
  17992. +#ifdef CONFIG_PREEMPT_RT_FULL
  17993. + int lock = !early_boot_irqs_disabled && (preempt_count() == 0) &&
  17994. + !irqs_disabled();
  17995. +#else
  17996. + int lock = 1;
  17997. +#endif
  17998. +
  17999. + if (!lock)
  18000. + return 0;
  18001. if (!console_trylock())
  18002. return 0;
  18003. @@ -1607,6 +1648,62 @@
  18004. return textlen;
  18005. }
  18006. +#ifdef CONFIG_EARLY_PRINTK
  18007. +struct console *early_console;
  18008. +
  18009. +static void early_vprintk(const char *fmt, va_list ap)
  18010. +{
  18011. + if (early_console) {
  18012. + char buf[512];
  18013. + int n = vscnprintf(buf, sizeof(buf), fmt, ap);
  18014. +
  18015. + early_console->write(early_console, buf, n);
  18016. + }
  18017. +}
  18018. +
  18019. +asmlinkage void early_printk(const char *fmt, ...)
  18020. +{
  18021. + va_list ap;
  18022. +
  18023. + va_start(ap, fmt);
  18024. + early_vprintk(fmt, ap);
  18025. + va_end(ap);
  18026. +}
  18027. +
  18028. +/*
  18029. + * This is independent of any log levels - a global
  18030. + * kill switch that turns off all of printk.
  18031. + *
  18032. + * Used by the NMI watchdog if early-printk is enabled.
  18033. + */
  18034. +static bool __read_mostly printk_killswitch;
  18035. +
  18036. +static int __init force_early_printk_setup(char *str)
  18037. +{
  18038. + printk_killswitch = true;
  18039. + return 0;
  18040. +}
  18041. +early_param("force_early_printk", force_early_printk_setup);
  18042. +
  18043. +void printk_kill(void)
  18044. +{
  18045. + printk_killswitch = true;
  18046. +}
  18047. +
  18048. +static int forced_early_printk(const char *fmt, va_list ap)
  18049. +{
  18050. + if (!printk_killswitch)
  18051. + return 0;
  18052. + early_vprintk(fmt, ap);
  18053. + return 1;
  18054. +}
  18055. +#else
  18056. +static inline int forced_early_printk(const char *fmt, va_list ap)
  18057. +{
  18058. + return 0;
  18059. +}
  18060. +#endif
  18061. +
  18062. asmlinkage int vprintk_emit(int facility, int level,
  18063. const char *dict, size_t dictlen,
  18064. const char *fmt, va_list args)
  18065. @@ -1623,6 +1720,13 @@
  18066. /* cpu currently holding logbuf_lock in this function */
  18067. static unsigned int logbuf_cpu = UINT_MAX;
  18068. + /*
  18069. + * Fall back to early_printk if a debugging subsystem has
  18070. + * killed printk output
  18071. + */
  18072. + if (unlikely(forced_early_printk(fmt, args)))
  18073. + return 1;
  18074. +
  18075. if (level == LOGLEVEL_SCHED) {
  18076. level = LOGLEVEL_DEFAULT;
  18077. in_sched = true;
  18078. @@ -1764,8 +1868,7 @@
  18079. * console_sem which would prevent anyone from printing to
  18080. * console
  18081. */
  18082. - preempt_disable();
  18083. -
  18084. + migrate_disable();
  18085. /*
  18086. * Try to acquire and then immediately release the console
  18087. * semaphore. The release will print out buffers and wake up
  18088. @@ -1773,7 +1876,7 @@
  18089. */
  18090. if (console_trylock_for_printk())
  18091. console_unlock();
  18092. - preempt_enable();
  18093. + migrate_enable();
  18094. lockdep_on();
  18095. }
  18096. @@ -1902,26 +2005,6 @@
  18097. #endif /* CONFIG_PRINTK */
  18098. -#ifdef CONFIG_EARLY_PRINTK
  18099. -struct console *early_console;
  18100. -
  18101. -asmlinkage __visible void early_printk(const char *fmt, ...)
  18102. -{
  18103. - va_list ap;
  18104. - char buf[512];
  18105. - int n;
  18106. -
  18107. - if (!early_console)
  18108. - return;
  18109. -
  18110. - va_start(ap, fmt);
  18111. - n = vscnprintf(buf, sizeof(buf), fmt, ap);
  18112. - va_end(ap);
  18113. -
  18114. - early_console->write(early_console, buf, n);
  18115. -}
  18116. -#endif
  18117. -
  18118. static int __add_preferred_console(char *name, int idx, char *options,
  18119. char *brl_options)
  18120. {
  18121. @@ -2143,11 +2226,16 @@
  18122. goto out;
  18123. len = cont_print_text(text, size);
  18124. +#ifndef CONFIG_PREEMPT_RT_FULL
  18125. raw_spin_unlock(&logbuf_lock);
  18126. stop_critical_timings();
  18127. call_console_drivers(cont.level, text, len);
  18128. start_critical_timings();
  18129. local_irq_restore(flags);
  18130. +#else
  18131. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  18132. + call_console_drivers(cont.level, text, len);
  18133. +#endif
  18134. return;
  18135. out:
  18136. raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  18137. @@ -2246,12 +2334,17 @@
  18138. console_idx = log_next(console_idx);
  18139. console_seq++;
  18140. console_prev = msg->flags;
  18141. +#ifdef CONFIG_PREEMPT_RT_FULL
  18142. + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
  18143. + call_console_drivers(level, text, len);
  18144. +#else
  18145. raw_spin_unlock(&logbuf_lock);
  18146. stop_critical_timings(); /* don't trace print latency */
  18147. call_console_drivers(level, text, len);
  18148. start_critical_timings();
  18149. local_irq_restore(flags);
  18150. +#endif
  18151. if (do_cond_resched)
  18152. cond_resched();
  18153. @@ -2304,6 +2397,11 @@
  18154. {
  18155. struct console *c;
  18156. + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) {
  18157. + if (in_irq() || in_nmi())
  18158. + return;
  18159. + }
  18160. +
  18161. /*
  18162. * console_unblank can no longer be called in interrupt context unless
  18163. * oops_in_progress is set to 1..
  18164. diff -Nur linux-4.1.39.orig/kernel/ptrace.c linux-4.1.39/kernel/ptrace.c
  18165. --- linux-4.1.39.orig/kernel/ptrace.c 2017-03-13 21:04:36.000000000 +0100
  18166. +++ linux-4.1.39/kernel/ptrace.c 2017-04-18 17:56:30.625397596 +0200
  18167. @@ -137,7 +137,14 @@
  18168. spin_lock_irq(&task->sighand->siglock);
  18169. if (task_is_traced(task) && !__fatal_signal_pending(task)) {
  18170. - task->state = __TASK_TRACED;
  18171. + unsigned long flags;
  18172. +
  18173. + raw_spin_lock_irqsave(&task->pi_lock, flags);
  18174. + if (task->state & __TASK_TRACED)
  18175. + task->state = __TASK_TRACED;
  18176. + else
  18177. + task->saved_state = __TASK_TRACED;
  18178. + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
  18179. ret = true;
  18180. }
  18181. spin_unlock_irq(&task->sighand->siglock);
  18182. diff -Nur linux-4.1.39.orig/kernel/rcu/rcutorture.c linux-4.1.39/kernel/rcu/rcutorture.c
  18183. --- linux-4.1.39.orig/kernel/rcu/rcutorture.c 2017-03-13 21:04:36.000000000 +0100
  18184. +++ linux-4.1.39/kernel/rcu/rcutorture.c 2017-04-18 17:56:30.625397596 +0200
  18185. @@ -389,6 +389,7 @@
  18186. .name = "rcu"
  18187. };
  18188. +#ifndef CONFIG_PREEMPT_RT_FULL
  18189. /*
  18190. * Definitions for rcu_bh torture testing.
  18191. */
  18192. @@ -428,6 +429,12 @@
  18193. .name = "rcu_bh"
  18194. };
  18195. +#else
  18196. +static struct rcu_torture_ops rcu_bh_ops = {
  18197. + .ttype = INVALID_RCU_FLAVOR,
  18198. +};
  18199. +#endif
  18200. +
  18201. /*
  18202. * Don't even think about trying any of these in real life!!!
  18203. * The names includes "busted", and they really means it!
  18204. diff -Nur linux-4.1.39.orig/kernel/rcu/tree.c linux-4.1.39/kernel/rcu/tree.c
  18205. --- linux-4.1.39.orig/kernel/rcu/tree.c 2017-03-13 21:04:36.000000000 +0100
  18206. +++ linux-4.1.39/kernel/rcu/tree.c 2017-04-18 17:56:30.625397596 +0200
  18207. @@ -56,6 +56,11 @@
  18208. #include <linux/random.h>
  18209. #include <linux/ftrace_event.h>
  18210. #include <linux/suspend.h>
  18211. +#include <linux/delay.h>
  18212. +#include <linux/gfp.h>
  18213. +#include <linux/oom.h>
  18214. +#include <linux/smpboot.h>
  18215. +#include "../time/tick-internal.h"
  18216. #include "tree.h"
  18217. #include "rcu.h"
  18218. @@ -220,6 +225,19 @@
  18219. }
  18220. }
  18221. +#ifdef CONFIG_PREEMPT_RT_FULL
  18222. +static void rcu_preempt_qs(void);
  18223. +
  18224. +void rcu_bh_qs(void)
  18225. +{
  18226. + unsigned long flags;
  18227. +
  18228. + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */
  18229. + local_irq_save(flags);
  18230. + rcu_preempt_qs();
  18231. + local_irq_restore(flags);
  18232. +}
  18233. +#else
  18234. void rcu_bh_qs(void)
  18235. {
  18236. if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
  18237. @@ -229,6 +247,7 @@
  18238. __this_cpu_write(rcu_bh_data.passed_quiesce, 1);
  18239. }
  18240. }
  18241. +#endif
  18242. static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
  18243. @@ -404,6 +423,7 @@
  18244. }
  18245. EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
  18246. +#ifndef CONFIG_PREEMPT_RT_FULL
  18247. /*
  18248. * Return the number of RCU BH batches completed thus far for debug & stats.
  18249. */
  18250. @@ -431,6 +451,13 @@
  18251. }
  18252. EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
  18253. +#else
  18254. +void rcu_force_quiescent_state(void)
  18255. +{
  18256. +}
  18257. +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  18258. +#endif
  18259. +
  18260. /*
  18261. * Force a quiescent state for RCU-sched.
  18262. */
  18263. @@ -1545,7 +1572,7 @@
  18264. !ACCESS_ONCE(rsp->gp_flags) ||
  18265. !rsp->gp_kthread)
  18266. return;
  18267. - wake_up(&rsp->gp_wq);
  18268. + swait_wake(&rsp->gp_wq);
  18269. }
  18270. /*
  18271. @@ -1986,7 +2013,7 @@
  18272. ACCESS_ONCE(rsp->gpnum),
  18273. TPS("reqwait"));
  18274. rsp->gp_state = RCU_GP_WAIT_GPS;
  18275. - wait_event_interruptible(rsp->gp_wq,
  18276. + swait_event_interruptible(rsp->gp_wq,
  18277. ACCESS_ONCE(rsp->gp_flags) &
  18278. RCU_GP_FLAG_INIT);
  18279. /* Locking provides needed memory barrier. */
  18280. @@ -2015,7 +2042,7 @@
  18281. ACCESS_ONCE(rsp->gpnum),
  18282. TPS("fqswait"));
  18283. rsp->gp_state = RCU_GP_WAIT_FQS;
  18284. - ret = wait_event_interruptible_timeout(rsp->gp_wq,
  18285. + ret = swait_event_interruptible_timeout(rsp->gp_wq,
  18286. ((gf = ACCESS_ONCE(rsp->gp_flags)) &
  18287. RCU_GP_FLAG_FQS) ||
  18288. (!ACCESS_ONCE(rnp->qsmask) &&
  18289. @@ -2860,18 +2887,17 @@
  18290. /*
  18291. * Do RCU core processing for the current CPU.
  18292. */
  18293. -static void rcu_process_callbacks(struct softirq_action *unused)
  18294. +static void rcu_process_callbacks(void)
  18295. {
  18296. struct rcu_state *rsp;
  18297. if (cpu_is_offline(smp_processor_id()))
  18298. return;
  18299. - trace_rcu_utilization(TPS("Start RCU core"));
  18300. for_each_rcu_flavor(rsp)
  18301. __rcu_process_callbacks(rsp);
  18302. - trace_rcu_utilization(TPS("End RCU core"));
  18303. }
  18304. +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
  18305. /*
  18306. * Schedule RCU callback invocation. If the specified type of RCU
  18307. * does not support RCU priority boosting, just do a direct call,
  18308. @@ -2883,18 +2909,105 @@
  18309. {
  18310. if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
  18311. return;
  18312. - if (likely(!rsp->boost)) {
  18313. - rcu_do_batch(rsp, rdp);
  18314. + rcu_do_batch(rsp, rdp);
  18315. +}
  18316. +
  18317. +static void rcu_wake_cond(struct task_struct *t, int status)
  18318. +{
  18319. + /*
  18320. + * If the thread is yielding, only wake it when this
  18321. + * is invoked from idle
  18322. + */
  18323. + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
  18324. + wake_up_process(t);
  18325. +}
  18326. +
  18327. +/*
  18328. + * Wake up this CPU's rcuc kthread to do RCU core processing.
  18329. + */
  18330. +static void invoke_rcu_core(void)
  18331. +{
  18332. + unsigned long flags;
  18333. + struct task_struct *t;
  18334. +
  18335. + if (!cpu_online(smp_processor_id()))
  18336. return;
  18337. + local_irq_save(flags);
  18338. + __this_cpu_write(rcu_cpu_has_work, 1);
  18339. + t = __this_cpu_read(rcu_cpu_kthread_task);
  18340. + if (t != NULL && current != t)
  18341. + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
  18342. + local_irq_restore(flags);
  18343. +}
  18344. +
  18345. +static void rcu_cpu_kthread_park(unsigned int cpu)
  18346. +{
  18347. + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  18348. +}
  18349. +
  18350. +static int rcu_cpu_kthread_should_run(unsigned int cpu)
  18351. +{
  18352. + return __this_cpu_read(rcu_cpu_has_work);
  18353. +}
  18354. +
  18355. +/*
  18356. + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  18357. + * RCU softirq used in flavors and configurations of RCU that do not
  18358. + * support RCU priority boosting.
  18359. + */
  18360. +static void rcu_cpu_kthread(unsigned int cpu)
  18361. +{
  18362. + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  18363. + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  18364. + int spincnt;
  18365. +
  18366. + for (spincnt = 0; spincnt < 10; spincnt++) {
  18367. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  18368. + local_bh_disable();
  18369. + *statusp = RCU_KTHREAD_RUNNING;
  18370. + this_cpu_inc(rcu_cpu_kthread_loops);
  18371. + local_irq_disable();
  18372. + work = *workp;
  18373. + *workp = 0;
  18374. + local_irq_enable();
  18375. + if (work)
  18376. + rcu_process_callbacks();
  18377. + local_bh_enable();
  18378. + if (*workp == 0) {
  18379. + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  18380. + *statusp = RCU_KTHREAD_WAITING;
  18381. + return;
  18382. + }
  18383. }
  18384. - invoke_rcu_callbacks_kthread();
  18385. + *statusp = RCU_KTHREAD_YIELDING;
  18386. + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  18387. + schedule_timeout_interruptible(2);
  18388. + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  18389. + *statusp = RCU_KTHREAD_WAITING;
  18390. }
  18391. -static void invoke_rcu_core(void)
  18392. +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  18393. + .store = &rcu_cpu_kthread_task,
  18394. + .thread_should_run = rcu_cpu_kthread_should_run,
  18395. + .thread_fn = rcu_cpu_kthread,
  18396. + .thread_comm = "rcuc/%u",
  18397. + .setup = rcu_cpu_kthread_setup,
  18398. + .park = rcu_cpu_kthread_park,
  18399. +};
  18400. +
  18401. +/*
  18402. + * Spawn per-CPU RCU core processing kthreads.
  18403. + */
  18404. +static int __init rcu_spawn_core_kthreads(void)
  18405. {
  18406. - if (cpu_online(smp_processor_id()))
  18407. - raise_softirq(RCU_SOFTIRQ);
  18408. + int cpu;
  18409. +
  18410. + for_each_possible_cpu(cpu)
  18411. + per_cpu(rcu_cpu_has_work, cpu) = 0;
  18412. + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  18413. + return 0;
  18414. }
  18415. +early_initcall(rcu_spawn_core_kthreads);
  18416. /*
  18417. * Handle any core-RCU processing required by a call_rcu() invocation.
  18418. @@ -3040,6 +3153,7 @@
  18419. }
  18420. EXPORT_SYMBOL_GPL(call_rcu_sched);
  18421. +#ifndef CONFIG_PREEMPT_RT_FULL
  18422. /*
  18423. * Queue an RCU callback for invocation after a quicker grace period.
  18424. */
  18425. @@ -3048,6 +3162,7 @@
  18426. __call_rcu(head, func, &rcu_bh_state, -1, 0);
  18427. }
  18428. EXPORT_SYMBOL_GPL(call_rcu_bh);
  18429. +#endif
  18430. /*
  18431. * Queue an RCU callback for lazy invocation after a grace period.
  18432. @@ -3139,6 +3254,7 @@
  18433. }
  18434. EXPORT_SYMBOL_GPL(synchronize_sched);
  18435. +#ifndef CONFIG_PREEMPT_RT_FULL
  18436. /**
  18437. * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
  18438. *
  18439. @@ -3165,6 +3281,7 @@
  18440. wait_rcu_gp(call_rcu_bh);
  18441. }
  18442. EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
  18443. +#endif
  18444. /**
  18445. * get_state_synchronize_rcu - Snapshot current RCU state
  18446. @@ -3677,6 +3794,7 @@
  18447. mutex_unlock(&rsp->barrier_mutex);
  18448. }
  18449. +#ifndef CONFIG_PREEMPT_RT_FULL
  18450. /**
  18451. * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
  18452. */
  18453. @@ -3685,6 +3803,7 @@
  18454. _rcu_barrier(&rcu_bh_state);
  18455. }
  18456. EXPORT_SYMBOL_GPL(rcu_barrier_bh);
  18457. +#endif
  18458. /**
  18459. * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
  18460. @@ -4021,7 +4140,7 @@
  18461. }
  18462. }
  18463. - init_waitqueue_head(&rsp->gp_wq);
  18464. + init_swait_head(&rsp->gp_wq);
  18465. rnp = rsp->level[rcu_num_lvls - 1];
  18466. for_each_possible_cpu(i) {
  18467. while (i > rnp->grphi)
  18468. @@ -4120,7 +4239,6 @@
  18469. rcu_init_one(&rcu_bh_state, &rcu_bh_data);
  18470. rcu_init_one(&rcu_sched_state, &rcu_sched_data);
  18471. __rcu_init_preempt();
  18472. - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
  18473. /*
  18474. * We don't need protection against CPU-hotplug here because
  18475. diff -Nur linux-4.1.39.orig/kernel/rcu/tree.h linux-4.1.39/kernel/rcu/tree.h
  18476. --- linux-4.1.39.orig/kernel/rcu/tree.h 2017-03-13 21:04:36.000000000 +0100
  18477. +++ linux-4.1.39/kernel/rcu/tree.h 2017-04-18 17:56:30.629397751 +0200
  18478. @@ -27,6 +27,7 @@
  18479. #include <linux/threads.h>
  18480. #include <linux/cpumask.h>
  18481. #include <linux/seqlock.h>
  18482. +#include <linux/wait-simple.h>
  18483. /*
  18484. * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
  18485. @@ -210,7 +211,7 @@
  18486. /* This can happen due to race conditions. */
  18487. #endif /* #ifdef CONFIG_RCU_BOOST */
  18488. #ifdef CONFIG_RCU_NOCB_CPU
  18489. - wait_queue_head_t nocb_gp_wq[2];
  18490. + struct swait_head nocb_gp_wq[2];
  18491. /* Place for rcu_nocb_kthread() to wait GP. */
  18492. #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
  18493. int need_future_gp[2];
  18494. @@ -349,7 +350,7 @@
  18495. atomic_long_t nocb_q_count_lazy; /* invocation (all stages). */
  18496. struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
  18497. struct rcu_head **nocb_follower_tail;
  18498. - wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
  18499. + struct swait_head nocb_wq; /* For nocb kthreads to sleep on. */
  18500. struct task_struct *nocb_kthread;
  18501. int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
  18502. @@ -438,7 +439,7 @@
  18503. unsigned long gpnum; /* Current gp number. */
  18504. unsigned long completed; /* # of last completed gp. */
  18505. struct task_struct *gp_kthread; /* Task for grace periods. */
  18506. - wait_queue_head_t gp_wq; /* Where GP task waits. */
  18507. + struct swait_head gp_wq; /* Where GP task waits. */
  18508. short gp_flags; /* Commands for GP task. */
  18509. short gp_state; /* GP kthread sleep state. */
  18510. @@ -529,12 +530,10 @@
  18511. DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
  18512. #endif /* #ifdef CONFIG_PREEMPT_RCU */
  18513. -#ifdef CONFIG_RCU_BOOST
  18514. DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  18515. DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
  18516. DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  18517. DECLARE_PER_CPU(char, rcu_cpu_has_work);
  18518. -#endif /* #ifdef CONFIG_RCU_BOOST */
  18519. #ifndef RCU_TREE_NONCORE
  18520. @@ -553,10 +552,9 @@
  18521. static void __init __rcu_init_preempt(void);
  18522. static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
  18523. static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
  18524. -static void invoke_rcu_callbacks_kthread(void);
  18525. static bool rcu_is_callbacks_kthread(void);
  18526. +static void rcu_cpu_kthread_setup(unsigned int cpu);
  18527. #ifdef CONFIG_RCU_BOOST
  18528. -static void rcu_preempt_do_callbacks(void);
  18529. static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
  18530. struct rcu_node *rnp);
  18531. #endif /* #ifdef CONFIG_RCU_BOOST */
  18532. diff -Nur linux-4.1.39.orig/kernel/rcu/tree_plugin.h linux-4.1.39/kernel/rcu/tree_plugin.h
  18533. --- linux-4.1.39.orig/kernel/rcu/tree_plugin.h 2017-03-13 21:04:36.000000000 +0100
  18534. +++ linux-4.1.39/kernel/rcu/tree_plugin.h 2017-04-18 17:56:30.629397751 +0200
  18535. @@ -24,27 +24,20 @@
  18536. * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  18537. */
  18538. -#include <linux/delay.h>
  18539. -#include <linux/gfp.h>
  18540. -#include <linux/oom.h>
  18541. -#include <linux/smpboot.h>
  18542. -#include "../time/tick-internal.h"
  18543. -
  18544. #ifdef CONFIG_RCU_BOOST
  18545. #include "../locking/rtmutex_common.h"
  18546. +#endif /* #ifdef CONFIG_RCU_BOOST */
  18547. +
  18548. /*
  18549. * Control variables for per-CPU and per-rcu_node kthreads. These
  18550. * handle all flavors of RCU.
  18551. */
  18552. -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
  18553. DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
  18554. DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
  18555. DEFINE_PER_CPU(char, rcu_cpu_has_work);
  18556. -#endif /* #ifdef CONFIG_RCU_BOOST */
  18557. -
  18558. #ifdef CONFIG_RCU_NOCB_CPU
  18559. static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
  18560. static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
  18561. @@ -291,7 +284,7 @@
  18562. }
  18563. /* Hardware IRQ handlers cannot block, complain if they get here. */
  18564. - if (in_irq() || in_serving_softirq()) {
  18565. + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) {
  18566. lockdep_rcu_suspicious(__FILE__, __LINE__,
  18567. "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
  18568. pr_alert("->rcu_read_unlock_special: %#x (b: %d, nq: %d)\n",
  18569. @@ -496,15 +489,6 @@
  18570. t->rcu_read_unlock_special.b.need_qs = true;
  18571. }
  18572. -#ifdef CONFIG_RCU_BOOST
  18573. -
  18574. -static void rcu_preempt_do_callbacks(void)
  18575. -{
  18576. - rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
  18577. -}
  18578. -
  18579. -#endif /* #ifdef CONFIG_RCU_BOOST */
  18580. -
  18581. /*
  18582. * Queue a preemptible-RCU callback for invocation after a grace period.
  18583. */
  18584. @@ -939,6 +923,19 @@
  18585. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  18586. +/*
  18587. + * If boosting, set rcuc kthreads to realtime priority.
  18588. + */
  18589. +static void rcu_cpu_kthread_setup(unsigned int cpu)
  18590. +{
  18591. +#ifdef CONFIG_RCU_BOOST
  18592. + struct sched_param sp;
  18593. +
  18594. + sp.sched_priority = kthread_prio;
  18595. + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  18596. +#endif /* #ifdef CONFIG_RCU_BOOST */
  18597. +}
  18598. +
  18599. #ifdef CONFIG_RCU_BOOST
  18600. #include "../locking/rtmutex_common.h"
  18601. @@ -970,16 +967,6 @@
  18602. #endif /* #else #ifdef CONFIG_RCU_TRACE */
  18603. -static void rcu_wake_cond(struct task_struct *t, int status)
  18604. -{
  18605. - /*
  18606. - * If the thread is yielding, only wake it when this
  18607. - * is invoked from idle
  18608. - */
  18609. - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
  18610. - wake_up_process(t);
  18611. -}
  18612. -
  18613. /*
  18614. * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  18615. * or ->boost_tasks, advancing the pointer to the next task in the
  18616. @@ -1125,23 +1112,6 @@
  18617. }
  18618. /*
  18619. - * Wake up the per-CPU kthread to invoke RCU callbacks.
  18620. - */
  18621. -static void invoke_rcu_callbacks_kthread(void)
  18622. -{
  18623. - unsigned long flags;
  18624. -
  18625. - local_irq_save(flags);
  18626. - __this_cpu_write(rcu_cpu_has_work, 1);
  18627. - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
  18628. - current != __this_cpu_read(rcu_cpu_kthread_task)) {
  18629. - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
  18630. - __this_cpu_read(rcu_cpu_kthread_status));
  18631. - }
  18632. - local_irq_restore(flags);
  18633. -}
  18634. -
  18635. -/*
  18636. * Is the current CPU running the RCU-callbacks kthread?
  18637. * Caller must have preemption disabled.
  18638. */
  18639. @@ -1196,67 +1166,6 @@
  18640. return 0;
  18641. }
  18642. -static void rcu_kthread_do_work(void)
  18643. -{
  18644. - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
  18645. - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
  18646. - rcu_preempt_do_callbacks();
  18647. -}
  18648. -
  18649. -static void rcu_cpu_kthread_setup(unsigned int cpu)
  18650. -{
  18651. - struct sched_param sp;
  18652. -
  18653. - sp.sched_priority = kthread_prio;
  18654. - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  18655. -}
  18656. -
  18657. -static void rcu_cpu_kthread_park(unsigned int cpu)
  18658. -{
  18659. - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  18660. -}
  18661. -
  18662. -static int rcu_cpu_kthread_should_run(unsigned int cpu)
  18663. -{
  18664. - return __this_cpu_read(rcu_cpu_has_work);
  18665. -}
  18666. -
  18667. -/*
  18668. - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  18669. - * RCU softirq used in flavors and configurations of RCU that do not
  18670. - * support RCU priority boosting.
  18671. - */
  18672. -static void rcu_cpu_kthread(unsigned int cpu)
  18673. -{
  18674. - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
  18675. - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
  18676. - int spincnt;
  18677. -
  18678. - for (spincnt = 0; spincnt < 10; spincnt++) {
  18679. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
  18680. - local_bh_disable();
  18681. - *statusp = RCU_KTHREAD_RUNNING;
  18682. - this_cpu_inc(rcu_cpu_kthread_loops);
  18683. - local_irq_disable();
  18684. - work = *workp;
  18685. - *workp = 0;
  18686. - local_irq_enable();
  18687. - if (work)
  18688. - rcu_kthread_do_work();
  18689. - local_bh_enable();
  18690. - if (*workp == 0) {
  18691. - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
  18692. - *statusp = RCU_KTHREAD_WAITING;
  18693. - return;
  18694. - }
  18695. - }
  18696. - *statusp = RCU_KTHREAD_YIELDING;
  18697. - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
  18698. - schedule_timeout_interruptible(2);
  18699. - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
  18700. - *statusp = RCU_KTHREAD_WAITING;
  18701. -}
  18702. -
  18703. /*
  18704. * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  18705. * served by the rcu_node in question. The CPU hotplug lock is still
  18706. @@ -1286,26 +1195,12 @@
  18707. free_cpumask_var(cm);
  18708. }
  18709. -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  18710. - .store = &rcu_cpu_kthread_task,
  18711. - .thread_should_run = rcu_cpu_kthread_should_run,
  18712. - .thread_fn = rcu_cpu_kthread,
  18713. - .thread_comm = "rcuc/%u",
  18714. - .setup = rcu_cpu_kthread_setup,
  18715. - .park = rcu_cpu_kthread_park,
  18716. -};
  18717. -
  18718. /*
  18719. * Spawn boost kthreads -- called as soon as the scheduler is running.
  18720. */
  18721. static void __init rcu_spawn_boost_kthreads(void)
  18722. {
  18723. struct rcu_node *rnp;
  18724. - int cpu;
  18725. -
  18726. - for_each_possible_cpu(cpu)
  18727. - per_cpu(rcu_cpu_has_work, cpu) = 0;
  18728. - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  18729. rcu_for_each_leaf_node(rcu_state_p, rnp)
  18730. (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
  18731. }
  18732. @@ -1328,11 +1223,6 @@
  18733. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  18734. }
  18735. -static void invoke_rcu_callbacks_kthread(void)
  18736. -{
  18737. - WARN_ON_ONCE(1);
  18738. -}
  18739. -
  18740. static bool rcu_is_callbacks_kthread(void)
  18741. {
  18742. return false;
  18743. @@ -1356,7 +1246,7 @@
  18744. #endif /* #else #ifdef CONFIG_RCU_BOOST */
  18745. -#if !defined(CONFIG_RCU_FAST_NO_HZ)
  18746. +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL)
  18747. /*
  18748. * Check to see if any future RCU-related work will need to be done
  18749. @@ -1374,7 +1264,9 @@
  18750. return rcu_cpu_has_callbacks(NULL);
  18751. }
  18752. #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
  18753. +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */
  18754. +#if !defined(CONFIG_RCU_FAST_NO_HZ)
  18755. /*
  18756. * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
  18757. * after it.
  18758. @@ -1472,6 +1364,8 @@
  18759. return cbs_ready;
  18760. }
  18761. +#ifndef CONFIG_PREEMPT_RT_FULL
  18762. +
  18763. /*
  18764. * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
  18765. * to invoke. If the CPU has callbacks, try to advance them. Tell the
  18766. @@ -1512,7 +1406,7 @@
  18767. return 0;
  18768. }
  18769. #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
  18770. -
  18771. +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */
  18772. /*
  18773. * Prepare a CPU for idle from an RCU perspective. The first major task
  18774. * is to sense whether nohz mode has been enabled or disabled via sysfs.
  18775. @@ -1859,7 +1753,7 @@
  18776. */
  18777. static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
  18778. {
  18779. - wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
  18780. + swait_wake_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
  18781. }
  18782. /*
  18783. @@ -1877,8 +1771,8 @@
  18784. static void rcu_init_one_nocb(struct rcu_node *rnp)
  18785. {
  18786. - init_waitqueue_head(&rnp->nocb_gp_wq[0]);
  18787. - init_waitqueue_head(&rnp->nocb_gp_wq[1]);
  18788. + init_swait_head(&rnp->nocb_gp_wq[0]);
  18789. + init_swait_head(&rnp->nocb_gp_wq[1]);
  18790. }
  18791. #ifndef CONFIG_RCU_NOCB_CPU_ALL
  18792. @@ -1903,7 +1797,7 @@
  18793. if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) {
  18794. /* Prior smp_mb__after_atomic() orders against prior enqueue. */
  18795. ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false;
  18796. - wake_up(&rdp_leader->nocb_wq);
  18797. + swait_wake(&rdp_leader->nocb_wq);
  18798. }
  18799. }
  18800. @@ -2116,7 +2010,7 @@
  18801. */
  18802. trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
  18803. for (;;) {
  18804. - wait_event_interruptible(
  18805. + swait_event_interruptible(
  18806. rnp->nocb_gp_wq[c & 0x1],
  18807. (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
  18808. if (likely(d))
  18809. @@ -2144,7 +2038,7 @@
  18810. /* Wait for callbacks to appear. */
  18811. if (!rcu_nocb_poll) {
  18812. trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
  18813. - wait_event_interruptible(my_rdp->nocb_wq,
  18814. + swait_event_interruptible(my_rdp->nocb_wq,
  18815. !ACCESS_ONCE(my_rdp->nocb_leader_sleep));
  18816. /* Memory barrier handled by smp_mb() calls below and repoll. */
  18817. } else if (firsttime) {
  18818. @@ -2219,7 +2113,7 @@
  18819. * List was empty, wake up the follower.
  18820. * Memory barriers supplied by atomic_long_add().
  18821. */
  18822. - wake_up(&rdp->nocb_wq);
  18823. + swait_wake(&rdp->nocb_wq);
  18824. }
  18825. }
  18826. @@ -2240,7 +2134,7 @@
  18827. if (!rcu_nocb_poll) {
  18828. trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
  18829. "FollowerSleep");
  18830. - wait_event_interruptible(rdp->nocb_wq,
  18831. + swait_event_interruptible(rdp->nocb_wq,
  18832. ACCESS_ONCE(rdp->nocb_follower_head));
  18833. } else if (firsttime) {
  18834. /* Don't drown trace log with "Poll"! */
  18835. @@ -2399,7 +2293,7 @@
  18836. static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
  18837. {
  18838. rdp->nocb_tail = &rdp->nocb_head;
  18839. - init_waitqueue_head(&rdp->nocb_wq);
  18840. + init_swait_head(&rdp->nocb_wq);
  18841. rdp->nocb_follower_tail = &rdp->nocb_follower_head;
  18842. }
  18843. diff -Nur linux-4.1.39.orig/kernel/rcu/update.c linux-4.1.39/kernel/rcu/update.c
  18844. --- linux-4.1.39.orig/kernel/rcu/update.c 2017-03-13 21:04:36.000000000 +0100
  18845. +++ linux-4.1.39/kernel/rcu/update.c 2017-04-18 17:56:30.629397751 +0200
  18846. @@ -227,6 +227,7 @@
  18847. }
  18848. EXPORT_SYMBOL_GPL(rcu_read_lock_held);
  18849. +#ifndef CONFIG_PREEMPT_RT_FULL
  18850. /**
  18851. * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
  18852. *
  18853. @@ -253,6 +254,7 @@
  18854. return in_softirq() || irqs_disabled();
  18855. }
  18856. EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
  18857. +#endif
  18858. #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
  18859. diff -Nur linux-4.1.39.orig/kernel/relay.c linux-4.1.39/kernel/relay.c
  18860. --- linux-4.1.39.orig/kernel/relay.c 2017-03-13 21:04:36.000000000 +0100
  18861. +++ linux-4.1.39/kernel/relay.c 2017-04-18 17:56:30.629397751 +0200
  18862. @@ -339,6 +339,10 @@
  18863. {
  18864. struct rchan_buf *buf = (struct rchan_buf *)data;
  18865. wake_up_interruptible(&buf->read_wait);
  18866. + /*
  18867. + * Stupid polling for now:
  18868. + */
  18869. + mod_timer(&buf->timer, jiffies + 1);
  18870. }
  18871. /**
  18872. @@ -356,6 +360,7 @@
  18873. init_waitqueue_head(&buf->read_wait);
  18874. kref_init(&buf->kref);
  18875. setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
  18876. + mod_timer(&buf->timer, jiffies + 1);
  18877. } else
  18878. del_timer_sync(&buf->timer);
  18879. @@ -739,15 +744,6 @@
  18880. else
  18881. buf->early_bytes += buf->chan->subbuf_size -
  18882. buf->padding[old_subbuf];
  18883. - smp_mb();
  18884. - if (waitqueue_active(&buf->read_wait))
  18885. - /*
  18886. - * Calling wake_up_interruptible() from here
  18887. - * will deadlock if we happen to be logging
  18888. - * from the scheduler (trying to re-grab
  18889. - * rq->lock), so defer it.
  18890. - */
  18891. - mod_timer(&buf->timer, jiffies + 1);
  18892. }
  18893. old = buf->data;
  18894. diff -Nur linux-4.1.39.orig/kernel/sched/completion.c linux-4.1.39/kernel/sched/completion.c
  18895. --- linux-4.1.39.orig/kernel/sched/completion.c 2017-03-13 21:04:36.000000000 +0100
  18896. +++ linux-4.1.39/kernel/sched/completion.c 2017-04-18 17:56:30.633397907 +0200
  18897. @@ -30,10 +30,10 @@
  18898. {
  18899. unsigned long flags;
  18900. - spin_lock_irqsave(&x->wait.lock, flags);
  18901. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  18902. x->done++;
  18903. - __wake_up_locked(&x->wait, TASK_NORMAL, 1);
  18904. - spin_unlock_irqrestore(&x->wait.lock, flags);
  18905. + __swait_wake_locked(&x->wait, TASK_NORMAL, 1);
  18906. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  18907. }
  18908. EXPORT_SYMBOL(complete);
  18909. @@ -50,10 +50,10 @@
  18910. {
  18911. unsigned long flags;
  18912. - spin_lock_irqsave(&x->wait.lock, flags);
  18913. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  18914. x->done += UINT_MAX/2;
  18915. - __wake_up_locked(&x->wait, TASK_NORMAL, 0);
  18916. - spin_unlock_irqrestore(&x->wait.lock, flags);
  18917. + __swait_wake_locked(&x->wait, TASK_NORMAL, 0);
  18918. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  18919. }
  18920. EXPORT_SYMBOL(complete_all);
  18921. @@ -62,20 +62,20 @@
  18922. long (*action)(long), long timeout, int state)
  18923. {
  18924. if (!x->done) {
  18925. - DECLARE_WAITQUEUE(wait, current);
  18926. + DEFINE_SWAITER(wait);
  18927. - __add_wait_queue_tail_exclusive(&x->wait, &wait);
  18928. + swait_prepare_locked(&x->wait, &wait);
  18929. do {
  18930. if (signal_pending_state(state, current)) {
  18931. timeout = -ERESTARTSYS;
  18932. break;
  18933. }
  18934. __set_current_state(state);
  18935. - spin_unlock_irq(&x->wait.lock);
  18936. + raw_spin_unlock_irq(&x->wait.lock);
  18937. timeout = action(timeout);
  18938. - spin_lock_irq(&x->wait.lock);
  18939. + raw_spin_lock_irq(&x->wait.lock);
  18940. } while (!x->done && timeout);
  18941. - __remove_wait_queue(&x->wait, &wait);
  18942. + swait_finish_locked(&x->wait, &wait);
  18943. if (!x->done)
  18944. return timeout;
  18945. }
  18946. @@ -89,9 +89,9 @@
  18947. {
  18948. might_sleep();
  18949. - spin_lock_irq(&x->wait.lock);
  18950. + raw_spin_lock_irq(&x->wait.lock);
  18951. timeout = do_wait_for_common(x, action, timeout, state);
  18952. - spin_unlock_irq(&x->wait.lock);
  18953. + raw_spin_unlock_irq(&x->wait.lock);
  18954. return timeout;
  18955. }
  18956. @@ -277,12 +277,12 @@
  18957. if (!READ_ONCE(x->done))
  18958. return 0;
  18959. - spin_lock_irqsave(&x->wait.lock, flags);
  18960. + raw_spin_lock_irqsave(&x->wait.lock, flags);
  18961. if (!x->done)
  18962. ret = 0;
  18963. else
  18964. x->done--;
  18965. - spin_unlock_irqrestore(&x->wait.lock, flags);
  18966. + raw_spin_unlock_irqrestore(&x->wait.lock, flags);
  18967. return ret;
  18968. }
  18969. EXPORT_SYMBOL(try_wait_for_completion);
  18970. @@ -311,7 +311,7 @@
  18971. * after it's acquired the lock.
  18972. */
  18973. smp_rmb();
  18974. - spin_unlock_wait(&x->wait.lock);
  18975. + raw_spin_unlock_wait(&x->wait.lock);
  18976. return true;
  18977. }
  18978. EXPORT_SYMBOL(completion_done);
  18979. diff -Nur linux-4.1.39.orig/kernel/sched/core.c linux-4.1.39/kernel/sched/core.c
  18980. --- linux-4.1.39.orig/kernel/sched/core.c 2017-03-13 21:04:36.000000000 +0100
  18981. +++ linux-4.1.39/kernel/sched/core.c 2017-04-18 17:56:30.633397907 +0200
  18982. @@ -282,7 +282,11 @@
  18983. * Number of tasks to iterate in a single balance run.
  18984. * Limited because this is done with IRQs disabled.
  18985. */
  18986. +#ifndef CONFIG_PREEMPT_RT_FULL
  18987. const_debug unsigned int sysctl_sched_nr_migrate = 32;
  18988. +#else
  18989. +const_debug unsigned int sysctl_sched_nr_migrate = 8;
  18990. +#endif
  18991. /*
  18992. * period over which we average the RT time consumption, measured
  18993. @@ -461,6 +465,7 @@
  18994. hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  18995. rq->hrtick_timer.function = hrtick;
  18996. + rq->hrtick_timer.irqsafe = 1;
  18997. }
  18998. #else /* CONFIG_SCHED_HRTICK */
  18999. static inline void hrtick_clear(struct rq *rq)
  19000. @@ -541,6 +546,52 @@
  19001. #endif
  19002. #endif
  19003. +void wake_q_add(struct wake_q_head *head, struct task_struct *task)
  19004. +{
  19005. + struct wake_q_node *node = &task->wake_q;
  19006. +
  19007. + /*
  19008. + * Atomically grab the task, if ->wake_q is !nil already it means
  19009. + * its already queued (either by us or someone else) and will get the
  19010. + * wakeup due to that.
  19011. + *
  19012. + * This cmpxchg() implies a full barrier, which pairs with the write
  19013. + * barrier implied by the wakeup in wake_up_list().
  19014. + */
  19015. + if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
  19016. + return;
  19017. +
  19018. + get_task_struct(task);
  19019. +
  19020. + /*
  19021. + * The head is context local, there can be no concurrency.
  19022. + */
  19023. + *head->lastp = node;
  19024. + head->lastp = &node->next;
  19025. +}
  19026. +
  19027. +void wake_up_q(struct wake_q_head *head)
  19028. +{
  19029. + struct wake_q_node *node = head->first;
  19030. +
  19031. + while (node != WAKE_Q_TAIL) {
  19032. + struct task_struct *task;
  19033. +
  19034. + task = container_of(node, struct task_struct, wake_q);
  19035. + BUG_ON(!task);
  19036. + /* task can safely be re-inserted now */
  19037. + node = node->next;
  19038. + task->wake_q.next = NULL;
  19039. +
  19040. + /*
  19041. + * wake_up_process() implies a wmb() to pair with the queueing
  19042. + * in wake_q_add() so as not to miss wakeups.
  19043. + */
  19044. + wake_up_process(task);
  19045. + put_task_struct(task);
  19046. + }
  19047. +}
  19048. +
  19049. /*
  19050. * resched_curr - mark rq's current task 'to be rescheduled now'.
  19051. *
  19052. @@ -572,6 +623,38 @@
  19053. trace_sched_wake_idle_without_ipi(cpu);
  19054. }
  19055. +#ifdef CONFIG_PREEMPT_LAZY
  19056. +void resched_curr_lazy(struct rq *rq)
  19057. +{
  19058. + struct task_struct *curr = rq->curr;
  19059. + int cpu;
  19060. +
  19061. + if (!sched_feat(PREEMPT_LAZY)) {
  19062. + resched_curr(rq);
  19063. + return;
  19064. + }
  19065. +
  19066. + lockdep_assert_held(&rq->lock);
  19067. +
  19068. + if (test_tsk_need_resched(curr))
  19069. + return;
  19070. +
  19071. + if (test_tsk_need_resched_lazy(curr))
  19072. + return;
  19073. +
  19074. + set_tsk_need_resched_lazy(curr);
  19075. +
  19076. + cpu = cpu_of(rq);
  19077. + if (cpu == smp_processor_id())
  19078. + return;
  19079. +
  19080. + /* NEED_RESCHED_LAZY must be visible before we test polling */
  19081. + smp_mb();
  19082. + if (!tsk_is_polling(curr))
  19083. + smp_send_reschedule(cpu);
  19084. +}
  19085. +#endif
  19086. +
  19087. void resched_cpu(int cpu)
  19088. {
  19089. struct rq *rq = cpu_rq(cpu);
  19090. @@ -595,12 +678,14 @@
  19091. */
  19092. int get_nohz_timer_target(int pinned)
  19093. {
  19094. - int cpu = smp_processor_id();
  19095. + int cpu;
  19096. int i;
  19097. struct sched_domain *sd;
  19098. + preempt_disable_rt();
  19099. + cpu = smp_processor_id();
  19100. if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu))
  19101. - return cpu;
  19102. + goto preempt_en_rt;
  19103. rcu_read_lock();
  19104. for_each_domain(cpu, sd) {
  19105. @@ -613,6 +698,8 @@
  19106. }
  19107. unlock:
  19108. rcu_read_unlock();
  19109. +preempt_en_rt:
  19110. + preempt_enable_rt();
  19111. return cpu;
  19112. }
  19113. /*
  19114. @@ -1164,6 +1251,18 @@
  19115. static int migration_cpu_stop(void *data);
  19116. +static bool check_task_state(struct task_struct *p, long match_state)
  19117. +{
  19118. + bool match = false;
  19119. +
  19120. + raw_spin_lock_irq(&p->pi_lock);
  19121. + if (p->state == match_state || p->saved_state == match_state)
  19122. + match = true;
  19123. + raw_spin_unlock_irq(&p->pi_lock);
  19124. +
  19125. + return match;
  19126. +}
  19127. +
  19128. /*
  19129. * wait_task_inactive - wait for a thread to unschedule.
  19130. *
  19131. @@ -1208,7 +1307,7 @@
  19132. * is actually now running somewhere else!
  19133. */
  19134. while (task_running(rq, p)) {
  19135. - if (match_state && unlikely(p->state != match_state))
  19136. + if (match_state && !check_task_state(p, match_state))
  19137. return 0;
  19138. cpu_relax();
  19139. }
  19140. @@ -1223,7 +1322,8 @@
  19141. running = task_running(rq, p);
  19142. queued = task_on_rq_queued(p);
  19143. ncsw = 0;
  19144. - if (!match_state || p->state == match_state)
  19145. + if (!match_state || p->state == match_state ||
  19146. + p->saved_state == match_state)
  19147. ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
  19148. task_rq_unlock(rq, p, &flags);
  19149. @@ -1449,10 +1549,6 @@
  19150. {
  19151. activate_task(rq, p, en_flags);
  19152. p->on_rq = TASK_ON_RQ_QUEUED;
  19153. -
  19154. - /* if a worker is waking up, notify workqueue */
  19155. - if (p->flags & PF_WQ_WORKER)
  19156. - wq_worker_waking_up(p, cpu_of(rq));
  19157. }
  19158. /*
  19159. @@ -1462,9 +1558,9 @@
  19160. ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
  19161. {
  19162. check_preempt_curr(rq, p, wake_flags);
  19163. - trace_sched_wakeup(p, true);
  19164. -
  19165. p->state = TASK_RUNNING;
  19166. + trace_sched_wakeup(p);
  19167. +
  19168. #ifdef CONFIG_SMP
  19169. if (p->sched_class->task_woken)
  19170. p->sched_class->task_woken(rq, p);
  19171. @@ -1666,8 +1762,29 @@
  19172. */
  19173. smp_mb__before_spinlock();
  19174. raw_spin_lock_irqsave(&p->pi_lock, flags);
  19175. - if (!(p->state & state))
  19176. + if (!(p->state & state)) {
  19177. + /*
  19178. + * The task might be running due to a spinlock sleeper
  19179. + * wakeup. Check the saved state and set it to running
  19180. + * if the wakeup condition is true.
  19181. + */
  19182. + if (!(wake_flags & WF_LOCK_SLEEPER)) {
  19183. + if (p->saved_state & state) {
  19184. + p->saved_state = TASK_RUNNING;
  19185. + success = 1;
  19186. + }
  19187. + }
  19188. goto out;
  19189. + }
  19190. +
  19191. + /*
  19192. + * If this is a regular wakeup, then we can unconditionally
  19193. + * clear the saved state of a "lock sleeper".
  19194. + */
  19195. + if (!(wake_flags & WF_LOCK_SLEEPER))
  19196. + p->saved_state = TASK_RUNNING;
  19197. +
  19198. + trace_sched_waking(p);
  19199. success = 1; /* we're going to change ->state */
  19200. cpu = task_cpu(p);
  19201. @@ -1732,42 +1849,6 @@
  19202. }
  19203. /**
  19204. - * try_to_wake_up_local - try to wake up a local task with rq lock held
  19205. - * @p: the thread to be awakened
  19206. - *
  19207. - * Put @p on the run-queue if it's not already there. The caller must
  19208. - * ensure that this_rq() is locked, @p is bound to this_rq() and not
  19209. - * the current task.
  19210. - */
  19211. -static void try_to_wake_up_local(struct task_struct *p)
  19212. -{
  19213. - struct rq *rq = task_rq(p);
  19214. -
  19215. - if (WARN_ON_ONCE(rq != this_rq()) ||
  19216. - WARN_ON_ONCE(p == current))
  19217. - return;
  19218. -
  19219. - lockdep_assert_held(&rq->lock);
  19220. -
  19221. - if (!raw_spin_trylock(&p->pi_lock)) {
  19222. - raw_spin_unlock(&rq->lock);
  19223. - raw_spin_lock(&p->pi_lock);
  19224. - raw_spin_lock(&rq->lock);
  19225. - }
  19226. -
  19227. - if (!(p->state & TASK_NORMAL))
  19228. - goto out;
  19229. -
  19230. - if (!task_on_rq_queued(p))
  19231. - ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  19232. -
  19233. - ttwu_do_wakeup(rq, p, 0);
  19234. - ttwu_stat(p, smp_processor_id(), 0);
  19235. -out:
  19236. - raw_spin_unlock(&p->pi_lock);
  19237. -}
  19238. -
  19239. -/**
  19240. * wake_up_process - Wake up a specific process
  19241. * @p: The process to be woken up.
  19242. *
  19243. @@ -1781,11 +1862,23 @@
  19244. */
  19245. int wake_up_process(struct task_struct *p)
  19246. {
  19247. - WARN_ON(task_is_stopped_or_traced(p));
  19248. + WARN_ON(__task_is_stopped_or_traced(p));
  19249. return try_to_wake_up(p, TASK_NORMAL, 0);
  19250. }
  19251. EXPORT_SYMBOL(wake_up_process);
  19252. +/**
  19253. + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
  19254. + * @p: The process to be woken up.
  19255. + *
  19256. + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
  19257. + * the nature of the wakeup.
  19258. + */
  19259. +int wake_up_lock_sleeper(struct task_struct *p)
  19260. +{
  19261. + return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER);
  19262. +}
  19263. +
  19264. int wake_up_state(struct task_struct *p, unsigned int state)
  19265. {
  19266. return try_to_wake_up(p, state, 0);
  19267. @@ -1981,6 +2074,9 @@
  19268. p->on_cpu = 0;
  19269. #endif
  19270. init_task_preempt_count(p);
  19271. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  19272. + task_thread_info(p)->preempt_lazy_count = 0;
  19273. +#endif
  19274. #ifdef CONFIG_SMP
  19275. plist_node_init(&p->pushable_tasks, MAX_PRIO);
  19276. RB_CLEAR_NODE(&p->pushable_dl_tasks);
  19277. @@ -2116,7 +2212,7 @@
  19278. rq = __task_rq_lock(p);
  19279. activate_task(rq, p, 0);
  19280. p->on_rq = TASK_ON_RQ_QUEUED;
  19281. - trace_sched_wakeup_new(p, true);
  19282. + trace_sched_wakeup_new(p);
  19283. check_preempt_curr(rq, p, WF_FORK);
  19284. #ifdef CONFIG_SMP
  19285. if (p->sched_class->task_woken)
  19286. @@ -2253,8 +2349,12 @@
  19287. finish_arch_post_lock_switch();
  19288. fire_sched_in_preempt_notifiers(current);
  19289. + /*
  19290. + * We use mmdrop_delayed() here so we don't have to do the
  19291. + * full __mmdrop() when we are the last user.
  19292. + */
  19293. if (mm)
  19294. - mmdrop(mm);
  19295. + mmdrop_delayed(mm);
  19296. if (unlikely(prev_state == TASK_DEAD)) {
  19297. if (prev->sched_class->task_dead)
  19298. prev->sched_class->task_dead(prev);
  19299. @@ -2565,16 +2665,6 @@
  19300. }
  19301. #endif
  19302. -notrace unsigned long get_parent_ip(unsigned long addr)
  19303. -{
  19304. - if (in_lock_functions(addr)) {
  19305. - addr = CALLER_ADDR2;
  19306. - if (in_lock_functions(addr))
  19307. - addr = CALLER_ADDR3;
  19308. - }
  19309. - return addr;
  19310. -}
  19311. -
  19312. #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
  19313. defined(CONFIG_PREEMPT_TRACER))
  19314. @@ -2596,7 +2686,7 @@
  19315. PREEMPT_MASK - 10);
  19316. #endif
  19317. if (preempt_count() == val) {
  19318. - unsigned long ip = get_parent_ip(CALLER_ADDR1);
  19319. + unsigned long ip = get_lock_parent_ip();
  19320. #ifdef CONFIG_DEBUG_PREEMPT
  19321. current->preempt_disable_ip = ip;
  19322. #endif
  19323. @@ -2623,7 +2713,7 @@
  19324. #endif
  19325. if (preempt_count() == val)
  19326. - trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
  19327. + trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
  19328. __preempt_count_sub(val);
  19329. }
  19330. EXPORT_SYMBOL(preempt_count_sub);
  19331. @@ -2679,6 +2769,133 @@
  19332. schedstat_inc(this_rq(), sched_count);
  19333. }
  19334. +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
  19335. +#define MIGRATE_DISABLE_SET_AFFIN (1<<30) /* Can't make a negative */
  19336. +#define migrate_disabled_updated(p) ((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN)
  19337. +#define migrate_disable_count(p) ((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN)
  19338. +
  19339. +static inline void update_migrate_disable(struct task_struct *p)
  19340. +{
  19341. + const struct cpumask *mask;
  19342. +
  19343. + if (likely(!p->migrate_disable))
  19344. + return;
  19345. +
  19346. + /* Did we already update affinity? */
  19347. + if (unlikely(migrate_disabled_updated(p)))
  19348. + return;
  19349. +
  19350. + /*
  19351. + * Since this is always current we can get away with only locking
  19352. + * rq->lock, the ->cpus_allowed value can normally only be changed
  19353. + * while holding both p->pi_lock and rq->lock, but seeing that this
  19354. + * is current, we cannot actually be waking up, so all code that
  19355. + * relies on serialization against p->pi_lock is out of scope.
  19356. + *
  19357. + * Having rq->lock serializes us against things like
  19358. + * set_cpus_allowed_ptr() that can still happen concurrently.
  19359. + */
  19360. + mask = tsk_cpus_allowed(p);
  19361. +
  19362. + if (p->sched_class->set_cpus_allowed)
  19363. + p->sched_class->set_cpus_allowed(p, mask);
  19364. + /* mask==cpumask_of(task_cpu(p)) which has a cpumask_weight==1 */
  19365. + p->nr_cpus_allowed = 1;
  19366. +
  19367. + /* Let migrate_enable know to fix things back up */
  19368. + p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN;
  19369. +}
  19370. +
  19371. +void migrate_disable(void)
  19372. +{
  19373. + struct task_struct *p = current;
  19374. +
  19375. + if (in_atomic() || irqs_disabled()) {
  19376. +#ifdef CONFIG_SCHED_DEBUG
  19377. + p->migrate_disable_atomic++;
  19378. +#endif
  19379. + return;
  19380. + }
  19381. +
  19382. +#ifdef CONFIG_SCHED_DEBUG
  19383. + if (unlikely(p->migrate_disable_atomic)) {
  19384. + tracing_off();
  19385. + WARN_ON_ONCE(1);
  19386. + }
  19387. +#endif
  19388. +
  19389. + if (p->migrate_disable) {
  19390. + p->migrate_disable++;
  19391. + return;
  19392. + }
  19393. +
  19394. + preempt_disable();
  19395. + preempt_lazy_disable();
  19396. + pin_current_cpu();
  19397. + p->migrate_disable = 1;
  19398. + preempt_enable();
  19399. +}
  19400. +EXPORT_SYMBOL(migrate_disable);
  19401. +
  19402. +void migrate_enable(void)
  19403. +{
  19404. + struct task_struct *p = current;
  19405. + const struct cpumask *mask;
  19406. + unsigned long flags;
  19407. + struct rq *rq;
  19408. +
  19409. + if (in_atomic() || irqs_disabled()) {
  19410. +#ifdef CONFIG_SCHED_DEBUG
  19411. + p->migrate_disable_atomic--;
  19412. +#endif
  19413. + return;
  19414. + }
  19415. +
  19416. +#ifdef CONFIG_SCHED_DEBUG
  19417. + if (unlikely(p->migrate_disable_atomic)) {
  19418. + tracing_off();
  19419. + WARN_ON_ONCE(1);
  19420. + }
  19421. +#endif
  19422. + WARN_ON_ONCE(p->migrate_disable <= 0);
  19423. +
  19424. + if (migrate_disable_count(p) > 1) {
  19425. + p->migrate_disable--;
  19426. + return;
  19427. + }
  19428. +
  19429. + preempt_disable();
  19430. + if (unlikely(migrate_disabled_updated(p))) {
  19431. + /*
  19432. + * Undo whatever update_migrate_disable() did, also see there
  19433. + * about locking.
  19434. + */
  19435. + rq = this_rq();
  19436. + raw_spin_lock_irqsave(&rq->lock, flags);
  19437. +
  19438. + /*
  19439. + * Clearing migrate_disable causes tsk_cpus_allowed to
  19440. + * show the tasks original cpu affinity.
  19441. + */
  19442. + p->migrate_disable = 0;
  19443. + mask = tsk_cpus_allowed(p);
  19444. + if (p->sched_class->set_cpus_allowed)
  19445. + p->sched_class->set_cpus_allowed(p, mask);
  19446. + p->nr_cpus_allowed = cpumask_weight(mask);
  19447. + raw_spin_unlock_irqrestore(&rq->lock, flags);
  19448. + } else
  19449. + p->migrate_disable = 0;
  19450. +
  19451. + unpin_current_cpu();
  19452. + preempt_enable();
  19453. + preempt_lazy_enable();
  19454. +}
  19455. +EXPORT_SYMBOL(migrate_enable);
  19456. +#else
  19457. +static inline void update_migrate_disable(struct task_struct *p) { }
  19458. +#define migrate_disabled_updated(p) 0
  19459. +#endif
  19460. +
  19461. /*
  19462. * Pick up the highest-prio task:
  19463. */
  19464. @@ -2785,6 +3002,8 @@
  19465. smp_mb__before_spinlock();
  19466. raw_spin_lock_irq(&rq->lock);
  19467. + update_migrate_disable(prev);
  19468. +
  19469. rq->clock_skip_update <<= 1; /* promote REQ to ACT */
  19470. switch_count = &prev->nivcsw;
  19471. @@ -2794,19 +3013,6 @@
  19472. } else {
  19473. deactivate_task(rq, prev, DEQUEUE_SLEEP);
  19474. prev->on_rq = 0;
  19475. -
  19476. - /*
  19477. - * If a worker went to sleep, notify and ask workqueue
  19478. - * whether it wants to wake up a task to maintain
  19479. - * concurrency.
  19480. - */
  19481. - if (prev->flags & PF_WQ_WORKER) {
  19482. - struct task_struct *to_wakeup;
  19483. -
  19484. - to_wakeup = wq_worker_sleeping(prev, cpu);
  19485. - if (to_wakeup)
  19486. - try_to_wake_up_local(to_wakeup);
  19487. - }
  19488. }
  19489. switch_count = &prev->nvcsw;
  19490. }
  19491. @@ -2816,6 +3022,7 @@
  19492. next = pick_next_task(rq, prev);
  19493. clear_tsk_need_resched(prev);
  19494. + clear_tsk_need_resched_lazy(prev);
  19495. clear_preempt_need_resched();
  19496. rq->clock_skip_update = 0;
  19497. @@ -2836,8 +3043,19 @@
  19498. static inline void sched_submit_work(struct task_struct *tsk)
  19499. {
  19500. - if (!tsk->state || tsk_is_pi_blocked(tsk))
  19501. + if (!tsk->state)
  19502. + return;
  19503. + /*
  19504. + * If a worker went to sleep, notify and ask workqueue whether
  19505. + * it wants to wake up a task to maintain concurrency.
  19506. + */
  19507. + if (tsk->flags & PF_WQ_WORKER)
  19508. + wq_worker_sleeping(tsk);
  19509. +
  19510. +
  19511. + if (tsk_is_pi_blocked(tsk))
  19512. return;
  19513. +
  19514. /*
  19515. * If we are going to sleep and we have plugged IO queued,
  19516. * make sure to submit it to avoid deadlocks.
  19517. @@ -2846,6 +3064,12 @@
  19518. blk_schedule_flush_plug(tsk);
  19519. }
  19520. +static void sched_update_worker(struct task_struct *tsk)
  19521. +{
  19522. + if (tsk->flags & PF_WQ_WORKER)
  19523. + wq_worker_running(tsk);
  19524. +}
  19525. +
  19526. asmlinkage __visible void __sched schedule(void)
  19527. {
  19528. struct task_struct *tsk = current;
  19529. @@ -2854,6 +3078,7 @@
  19530. do {
  19531. __schedule();
  19532. } while (need_resched());
  19533. + sched_update_worker(tsk);
  19534. }
  19535. EXPORT_SYMBOL(schedule);
  19536. @@ -2903,6 +3128,30 @@
  19537. } while (need_resched());
  19538. }
  19539. +#ifdef CONFIG_PREEMPT_LAZY
  19540. +/*
  19541. + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
  19542. + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
  19543. + * preempt_lazy_count counter >0.
  19544. + */
  19545. +static __always_inline int preemptible_lazy(void)
  19546. +{
  19547. + if (test_thread_flag(TIF_NEED_RESCHED))
  19548. + return 1;
  19549. + if (current_thread_info()->preempt_lazy_count)
  19550. + return 0;
  19551. + return 1;
  19552. +}
  19553. +
  19554. +#else
  19555. +
  19556. +static inline int preemptible_lazy(void)
  19557. +{
  19558. + return 1;
  19559. +}
  19560. +
  19561. +#endif
  19562. +
  19563. #ifdef CONFIG_PREEMPT
  19564. /*
  19565. * this is the entry point to schedule() from in-kernel preemption
  19566. @@ -2917,6 +3166,8 @@
  19567. */
  19568. if (likely(!preemptible()))
  19569. return;
  19570. + if (!preemptible_lazy())
  19571. + return;
  19572. preempt_schedule_common();
  19573. }
  19574. @@ -2944,6 +3195,8 @@
  19575. if (likely(!preemptible()))
  19576. return;
  19577. + if (!preemptible_lazy())
  19578. + return;
  19579. do {
  19580. __preempt_count_add(PREEMPT_ACTIVE);
  19581. @@ -2953,7 +3206,16 @@
  19582. * an infinite recursion.
  19583. */
  19584. prev_ctx = exception_enter();
  19585. + /*
  19586. + * The add/subtract must not be traced by the function
  19587. + * tracer. But we still want to account for the
  19588. + * preempt off latency tracer. Since the _notrace versions
  19589. + * of add/subtract skip the accounting for latency tracer
  19590. + * we must force it manually.
  19591. + */
  19592. + start_critical_timings();
  19593. __schedule();
  19594. + stop_critical_timings();
  19595. exception_exit(prev_ctx);
  19596. __preempt_count_sub(PREEMPT_ACTIVE);
  19597. @@ -4290,6 +4552,7 @@
  19598. }
  19599. EXPORT_SYMBOL(__cond_resched_lock);
  19600. +#ifndef CONFIG_PREEMPT_RT_FULL
  19601. int __sched __cond_resched_softirq(void)
  19602. {
  19603. BUG_ON(!in_softirq());
  19604. @@ -4303,6 +4566,7 @@
  19605. return 0;
  19606. }
  19607. EXPORT_SYMBOL(__cond_resched_softirq);
  19608. +#endif
  19609. /**
  19610. * yield - yield the current processor to other threads.
  19611. @@ -4659,7 +4923,9 @@
  19612. /* Set the preempt count _outside_ the spinlocks! */
  19613. init_idle_preempt_count(idle, cpu);
  19614. -
  19615. +#ifdef CONFIG_HAVE_PREEMPT_LAZY
  19616. + task_thread_info(idle)->preempt_lazy_count = 0;
  19617. +#endif
  19618. /*
  19619. * The idle tasks have their own, simple scheduling class:
  19620. */
  19621. @@ -4779,11 +5045,91 @@
  19622. void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  19623. {
  19624. - if (p->sched_class->set_cpus_allowed)
  19625. - p->sched_class->set_cpus_allowed(p, new_mask);
  19626. + if (!migrate_disabled_updated(p)) {
  19627. + if (p->sched_class->set_cpus_allowed)
  19628. + p->sched_class->set_cpus_allowed(p, new_mask);
  19629. + p->nr_cpus_allowed = cpumask_weight(new_mask);
  19630. + }
  19631. cpumask_copy(&p->cpus_allowed, new_mask);
  19632. - p->nr_cpus_allowed = cpumask_weight(new_mask);
  19633. +}
  19634. +
  19635. +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
  19636. +static DEFINE_MUTEX(sched_down_mutex);
  19637. +static cpumask_t sched_down_cpumask;
  19638. +
  19639. +void tell_sched_cpu_down_begin(int cpu)
  19640. +{
  19641. + mutex_lock(&sched_down_mutex);
  19642. + cpumask_set_cpu(cpu, &sched_down_cpumask);
  19643. + mutex_unlock(&sched_down_mutex);
  19644. +}
  19645. +
  19646. +void tell_sched_cpu_down_done(int cpu)
  19647. +{
  19648. + mutex_lock(&sched_down_mutex);
  19649. + cpumask_clear_cpu(cpu, &sched_down_cpumask);
  19650. + mutex_unlock(&sched_down_mutex);
  19651. +}
  19652. +
  19653. +/**
  19654. + * migrate_me - try to move the current task off this cpu
  19655. + *
  19656. + * Used by the pin_current_cpu() code to try to get tasks
  19657. + * to move off the current CPU as it is going down.
  19658. + * It will only move the task if the task isn't pinned to
  19659. + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
  19660. + * and the task has to be in a RUNNING state. Otherwise the
  19661. + * movement of the task will wake it up (change its state
  19662. + * to running) when the task did not expect it.
  19663. + *
  19664. + * Returns 1 if it succeeded in moving the current task
  19665. + * 0 otherwise.
  19666. + */
  19667. +int migrate_me(void)
  19668. +{
  19669. + struct task_struct *p = current;
  19670. + struct migration_arg arg;
  19671. + struct cpumask *cpumask;
  19672. + struct cpumask *mask;
  19673. + unsigned long flags;
  19674. + unsigned int dest_cpu;
  19675. + struct rq *rq;
  19676. +
  19677. + /*
  19678. + * We can not migrate tasks bounded to a CPU or tasks not
  19679. + * running. The movement of the task will wake it up.
  19680. + */
  19681. + if (p->flags & PF_NO_SETAFFINITY || p->state)
  19682. + return 0;
  19683. +
  19684. + mutex_lock(&sched_down_mutex);
  19685. + rq = task_rq_lock(p, &flags);
  19686. +
  19687. + cpumask = this_cpu_ptr(&sched_cpumasks);
  19688. + mask = &p->cpus_allowed;
  19689. +
  19690. + cpumask_andnot(cpumask, mask, &sched_down_cpumask);
  19691. +
  19692. + if (!cpumask_weight(cpumask)) {
  19693. + /* It's only on this CPU? */
  19694. + task_rq_unlock(rq, p, &flags);
  19695. + mutex_unlock(&sched_down_mutex);
  19696. + return 0;
  19697. + }
  19698. +
  19699. + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
  19700. +
  19701. + arg.task = p;
  19702. + arg.dest_cpu = dest_cpu;
  19703. +
  19704. + task_rq_unlock(rq, p, &flags);
  19705. +
  19706. + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
  19707. + tlb_migrate_finish(p->mm);
  19708. + mutex_unlock(&sched_down_mutex);
  19709. +
  19710. + return 1;
  19711. }
  19712. /*
  19713. @@ -4829,7 +5175,7 @@
  19714. do_set_cpus_allowed(p, new_mask);
  19715. /* Can the task run on the task's current CPU? If so, we're done */
  19716. - if (cpumask_test_cpu(task_cpu(p), new_mask))
  19717. + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
  19718. goto out;
  19719. dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
  19720. @@ -4969,6 +5315,8 @@
  19721. #ifdef CONFIG_HOTPLUG_CPU
  19722. +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
  19723. +
  19724. /*
  19725. * Ensures that the idle task is using init_mm right before its cpu goes
  19726. * offline.
  19727. @@ -4983,7 +5331,11 @@
  19728. switch_mm(mm, &init_mm, current);
  19729. finish_arch_post_lock_switch();
  19730. }
  19731. - mmdrop(mm);
  19732. + /*
  19733. + * Defer the cleanup to an alive cpu. On RT we can neither
  19734. + * call mmdrop() nor mmdrop_delayed() from here.
  19735. + */
  19736. + per_cpu(idle_last_mm, smp_processor_id()) = mm;
  19737. }
  19738. /*
  19739. @@ -5326,6 +5678,10 @@
  19740. case CPU_DEAD:
  19741. calc_load_migrate(rq);
  19742. + if (per_cpu(idle_last_mm, cpu)) {
  19743. + mmdrop(per_cpu(idle_last_mm, cpu));
  19744. + per_cpu(idle_last_mm, cpu) = NULL;
  19745. + }
  19746. break;
  19747. #endif
  19748. }
  19749. @@ -7305,7 +7661,8 @@
  19750. #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  19751. static inline int preempt_count_equals(int preempt_offset)
  19752. {
  19753. - int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
  19754. + int nested = (preempt_count() & ~PREEMPT_ACTIVE) +
  19755. + sched_rcu_preempt_depth();
  19756. return (nested == preempt_offset);
  19757. }
  19758. diff -Nur linux-4.1.39.orig/kernel/sched/cputime.c linux-4.1.39/kernel/sched/cputime.c
  19759. --- linux-4.1.39.orig/kernel/sched/cputime.c 2017-03-13 21:04:36.000000000 +0100
  19760. +++ linux-4.1.39/kernel/sched/cputime.c 2017-04-18 17:56:30.633397907 +0200
  19761. @@ -675,37 +675,45 @@
  19762. void vtime_account_system(struct task_struct *tsk)
  19763. {
  19764. - write_seqlock(&tsk->vtime_seqlock);
  19765. + raw_spin_lock(&tsk->vtime_lock);
  19766. + write_seqcount_begin(&tsk->vtime_seq);
  19767. __vtime_account_system(tsk);
  19768. - write_sequnlock(&tsk->vtime_seqlock);
  19769. + write_seqcount_end(&tsk->vtime_seq);
  19770. + raw_spin_unlock(&tsk->vtime_lock);
  19771. }
  19772. void vtime_gen_account_irq_exit(struct task_struct *tsk)
  19773. {
  19774. - write_seqlock(&tsk->vtime_seqlock);
  19775. + raw_spin_lock(&tsk->vtime_lock);
  19776. + write_seqcount_begin(&tsk->vtime_seq);
  19777. __vtime_account_system(tsk);
  19778. if (context_tracking_in_user())
  19779. tsk->vtime_snap_whence = VTIME_USER;
  19780. - write_sequnlock(&tsk->vtime_seqlock);
  19781. + write_seqcount_end(&tsk->vtime_seq);
  19782. + raw_spin_unlock(&tsk->vtime_lock);
  19783. }
  19784. void vtime_account_user(struct task_struct *tsk)
  19785. {
  19786. cputime_t delta_cpu;
  19787. - write_seqlock(&tsk->vtime_seqlock);
  19788. + raw_spin_lock(&tsk->vtime_lock);
  19789. + write_seqcount_begin(&tsk->vtime_seq);
  19790. delta_cpu = get_vtime_delta(tsk);
  19791. tsk->vtime_snap_whence = VTIME_SYS;
  19792. account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
  19793. - write_sequnlock(&tsk->vtime_seqlock);
  19794. + write_seqcount_end(&tsk->vtime_seq);
  19795. + raw_spin_unlock(&tsk->vtime_lock);
  19796. }
  19797. void vtime_user_enter(struct task_struct *tsk)
  19798. {
  19799. - write_seqlock(&tsk->vtime_seqlock);
  19800. + raw_spin_lock(&tsk->vtime_lock);
  19801. + write_seqcount_begin(&tsk->vtime_seq);
  19802. __vtime_account_system(tsk);
  19803. tsk->vtime_snap_whence = VTIME_USER;
  19804. - write_sequnlock(&tsk->vtime_seqlock);
  19805. + write_seqcount_end(&tsk->vtime_seq);
  19806. + raw_spin_unlock(&tsk->vtime_lock);
  19807. }
  19808. void vtime_guest_enter(struct task_struct *tsk)
  19809. @@ -717,19 +725,23 @@
  19810. * synchronization against the reader (task_gtime())
  19811. * that can thus safely catch up with a tickless delta.
  19812. */
  19813. - write_seqlock(&tsk->vtime_seqlock);
  19814. + raw_spin_lock(&tsk->vtime_lock);
  19815. + write_seqcount_begin(&tsk->vtime_seq);
  19816. __vtime_account_system(tsk);
  19817. current->flags |= PF_VCPU;
  19818. - write_sequnlock(&tsk->vtime_seqlock);
  19819. + write_seqcount_end(&tsk->vtime_seq);
  19820. + raw_spin_unlock(&tsk->vtime_lock);
  19821. }
  19822. EXPORT_SYMBOL_GPL(vtime_guest_enter);
  19823. void vtime_guest_exit(struct task_struct *tsk)
  19824. {
  19825. - write_seqlock(&tsk->vtime_seqlock);
  19826. + raw_spin_lock(&tsk->vtime_lock);
  19827. + write_seqcount_begin(&tsk->vtime_seq);
  19828. __vtime_account_system(tsk);
  19829. current->flags &= ~PF_VCPU;
  19830. - write_sequnlock(&tsk->vtime_seqlock);
  19831. + write_seqcount_end(&tsk->vtime_seq);
  19832. + raw_spin_unlock(&tsk->vtime_lock);
  19833. }
  19834. EXPORT_SYMBOL_GPL(vtime_guest_exit);
  19835. @@ -742,24 +754,30 @@
  19836. void arch_vtime_task_switch(struct task_struct *prev)
  19837. {
  19838. - write_seqlock(&prev->vtime_seqlock);
  19839. + raw_spin_lock(&prev->vtime_lock);
  19840. + write_seqcount_begin(&prev->vtime_seq);
  19841. prev->vtime_snap_whence = VTIME_SLEEPING;
  19842. - write_sequnlock(&prev->vtime_seqlock);
  19843. + write_seqcount_end(&prev->vtime_seq);
  19844. + raw_spin_unlock(&prev->vtime_lock);
  19845. - write_seqlock(&current->vtime_seqlock);
  19846. + raw_spin_lock(&current->vtime_lock);
  19847. + write_seqcount_begin(&current->vtime_seq);
  19848. current->vtime_snap_whence = VTIME_SYS;
  19849. current->vtime_snap = sched_clock_cpu(smp_processor_id());
  19850. - write_sequnlock(&current->vtime_seqlock);
  19851. + write_seqcount_end(&current->vtime_seq);
  19852. + raw_spin_unlock(&current->vtime_lock);
  19853. }
  19854. void vtime_init_idle(struct task_struct *t, int cpu)
  19855. {
  19856. unsigned long flags;
  19857. - write_seqlock_irqsave(&t->vtime_seqlock, flags);
  19858. + raw_spin_lock_irqsave(&t->vtime_lock, flags);
  19859. + write_seqcount_begin(&t->vtime_seq);
  19860. t->vtime_snap_whence = VTIME_SYS;
  19861. t->vtime_snap = sched_clock_cpu(cpu);
  19862. - write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
  19863. + write_seqcount_end(&t->vtime_seq);
  19864. + raw_spin_unlock_irqrestore(&t->vtime_lock, flags);
  19865. }
  19866. cputime_t task_gtime(struct task_struct *t)
  19867. @@ -768,13 +786,13 @@
  19868. cputime_t gtime;
  19869. do {
  19870. - seq = read_seqbegin(&t->vtime_seqlock);
  19871. + seq = read_seqcount_begin(&t->vtime_seq);
  19872. gtime = t->gtime;
  19873. if (t->flags & PF_VCPU)
  19874. gtime += vtime_delta(t);
  19875. - } while (read_seqretry(&t->vtime_seqlock, seq));
  19876. + } while (read_seqcount_retry(&t->vtime_seq, seq));
  19877. return gtime;
  19878. }
  19879. @@ -797,7 +815,7 @@
  19880. *udelta = 0;
  19881. *sdelta = 0;
  19882. - seq = read_seqbegin(&t->vtime_seqlock);
  19883. + seq = read_seqcount_begin(&t->vtime_seq);
  19884. if (u_dst)
  19885. *u_dst = *u_src;
  19886. @@ -821,7 +839,7 @@
  19887. if (t->vtime_snap_whence == VTIME_SYS)
  19888. *sdelta = delta;
  19889. }
  19890. - } while (read_seqretry(&t->vtime_seqlock, seq));
  19891. + } while (read_seqcount_retry(&t->vtime_seq, seq));
  19892. }
  19893. diff -Nur linux-4.1.39.orig/kernel/sched/deadline.c linux-4.1.39/kernel/sched/deadline.c
  19894. --- linux-4.1.39.orig/kernel/sched/deadline.c 2017-03-13 21:04:36.000000000 +0100
  19895. +++ linux-4.1.39/kernel/sched/deadline.c 2017-04-18 17:56:30.633397907 +0200
  19896. @@ -637,6 +637,7 @@
  19897. hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  19898. timer->function = dl_task_timer;
  19899. + timer->irqsafe = 1;
  19900. }
  19901. static
  19902. diff -Nur linux-4.1.39.orig/kernel/sched/debug.c linux-4.1.39/kernel/sched/debug.c
  19903. --- linux-4.1.39.orig/kernel/sched/debug.c 2017-03-13 21:04:36.000000000 +0100
  19904. +++ linux-4.1.39/kernel/sched/debug.c 2017-04-18 17:56:30.633397907 +0200
  19905. @@ -260,6 +260,9 @@
  19906. P(rt_throttled);
  19907. PN(rt_time);
  19908. PN(rt_runtime);
  19909. +#ifdef CONFIG_SMP
  19910. + P(rt_nr_migratory);
  19911. +#endif
  19912. #undef PN
  19913. #undef P
  19914. @@ -648,6 +651,10 @@
  19915. #endif
  19916. P(policy);
  19917. P(prio);
  19918. +#ifdef CONFIG_PREEMPT_RT_FULL
  19919. + P(migrate_disable);
  19920. +#endif
  19921. + P(nr_cpus_allowed);
  19922. #undef PN
  19923. #undef __PN
  19924. #undef P
  19925. diff -Nur linux-4.1.39.orig/kernel/sched/fair.c linux-4.1.39/kernel/sched/fair.c
  19926. --- linux-4.1.39.orig/kernel/sched/fair.c 2017-03-13 21:04:36.000000000 +0100
  19927. +++ linux-4.1.39/kernel/sched/fair.c 2017-04-18 17:56:30.637398061 +0200
  19928. @@ -3201,7 +3201,7 @@
  19929. ideal_runtime = sched_slice(cfs_rq, curr);
  19930. delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
  19931. if (delta_exec > ideal_runtime) {
  19932. - resched_curr(rq_of(cfs_rq));
  19933. + resched_curr_lazy(rq_of(cfs_rq));
  19934. /*
  19935. * The current task ran long enough, ensure it doesn't get
  19936. * re-elected due to buddy favours.
  19937. @@ -3225,7 +3225,7 @@
  19938. return;
  19939. if (delta > ideal_runtime)
  19940. - resched_curr(rq_of(cfs_rq));
  19941. + resched_curr_lazy(rq_of(cfs_rq));
  19942. }
  19943. static void
  19944. @@ -3366,7 +3366,7 @@
  19945. * validating it and just reschedule.
  19946. */
  19947. if (queued) {
  19948. - resched_curr(rq_of(cfs_rq));
  19949. + resched_curr_lazy(rq_of(cfs_rq));
  19950. return;
  19951. }
  19952. /*
  19953. @@ -3557,7 +3557,7 @@
  19954. * hierarchy can be throttled
  19955. */
  19956. if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
  19957. - resched_curr(rq_of(cfs_rq));
  19958. + resched_curr_lazy(rq_of(cfs_rq));
  19959. }
  19960. static __always_inline
  19961. @@ -4180,7 +4180,7 @@
  19962. if (delta < 0) {
  19963. if (rq->curr == p)
  19964. - resched_curr(rq);
  19965. + resched_curr_lazy(rq);
  19966. return;
  19967. }
  19968. hrtick_start(rq, delta);
  19969. @@ -5076,7 +5076,7 @@
  19970. return;
  19971. preempt:
  19972. - resched_curr(rq);
  19973. + resched_curr_lazy(rq);
  19974. /*
  19975. * Only set the backward buddy when the current task is still
  19976. * on the rq. This can happen when a wakeup gets interleaved
  19977. @@ -7869,7 +7869,7 @@
  19978. * 'current' within the tree based on its new key value.
  19979. */
  19980. swap(curr->vruntime, se->vruntime);
  19981. - resched_curr(rq);
  19982. + resched_curr_lazy(rq);
  19983. }
  19984. se->vruntime -= cfs_rq->min_vruntime;
  19985. @@ -7894,7 +7894,7 @@
  19986. */
  19987. if (rq->curr == p) {
  19988. if (p->prio > oldprio)
  19989. - resched_curr(rq);
  19990. + resched_curr_lazy(rq);
  19991. } else
  19992. check_preempt_curr(rq, p, 0);
  19993. }
  19994. diff -Nur linux-4.1.39.orig/kernel/sched/features.h linux-4.1.39/kernel/sched/features.h
  19995. --- linux-4.1.39.orig/kernel/sched/features.h 2017-03-13 21:04:36.000000000 +0100
  19996. +++ linux-4.1.39/kernel/sched/features.h 2017-04-18 17:56:30.637398061 +0200
  19997. @@ -50,11 +50,19 @@
  19998. */
  19999. SCHED_FEAT(NONTASK_CAPACITY, true)
  20000. +#ifdef CONFIG_PREEMPT_RT_FULL
  20001. +SCHED_FEAT(TTWU_QUEUE, false)
  20002. +# ifdef CONFIG_PREEMPT_LAZY
  20003. +SCHED_FEAT(PREEMPT_LAZY, true)
  20004. +# endif
  20005. +#else
  20006. +
  20007. /*
  20008. * Queue remote wakeups on the target CPU and process them
  20009. * using the scheduler IPI. Reduces rq->lock contention/bounces.
  20010. */
  20011. SCHED_FEAT(TTWU_QUEUE, true)
  20012. +#endif
  20013. #ifdef HAVE_RT_PUSH_IPI
  20014. /*
  20015. diff -Nur linux-4.1.39.orig/kernel/sched/Makefile linux-4.1.39/kernel/sched/Makefile
  20016. --- linux-4.1.39.orig/kernel/sched/Makefile 2017-03-13 21:04:36.000000000 +0100
  20017. +++ linux-4.1.39/kernel/sched/Makefile 2017-04-18 17:56:30.633397907 +0200
  20018. @@ -13,7 +13,7 @@
  20019. obj-y += core.o proc.o clock.o cputime.o
  20020. obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
  20021. -obj-y += wait.o completion.o idle.o
  20022. +obj-y += wait.o wait-simple.o work-simple.o completion.o idle.o
  20023. obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
  20024. obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
  20025. obj-$(CONFIG_SCHEDSTATS) += stats.o
  20026. diff -Nur linux-4.1.39.orig/kernel/sched/rt.c linux-4.1.39/kernel/sched/rt.c
  20027. --- linux-4.1.39.orig/kernel/sched/rt.c 2017-03-13 21:04:36.000000000 +0100
  20028. +++ linux-4.1.39/kernel/sched/rt.c 2017-04-18 17:56:30.637398061 +0200
  20029. @@ -44,6 +44,7 @@
  20030. hrtimer_init(&rt_b->rt_period_timer,
  20031. CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  20032. + rt_b->rt_period_timer.irqsafe = 1;
  20033. rt_b->rt_period_timer.function = sched_rt_period_timer;
  20034. }
  20035. @@ -89,6 +90,7 @@
  20036. rt_rq->push_cpu = nr_cpu_ids;
  20037. raw_spin_lock_init(&rt_rq->push_lock);
  20038. init_irq_work(&rt_rq->push_work, push_irq_work_func);
  20039. + rt_rq->push_work.flags |= IRQ_WORK_HARD_IRQ;
  20040. #endif
  20041. #endif /* CONFIG_SMP */
  20042. /* We start is dequeued state, because no RT tasks are queued */
  20043. diff -Nur linux-4.1.39.orig/kernel/sched/sched.h linux-4.1.39/kernel/sched/sched.h
  20044. --- linux-4.1.39.orig/kernel/sched/sched.h 2017-03-13 21:04:36.000000000 +0100
  20045. +++ linux-4.1.39/kernel/sched/sched.h 2017-04-18 17:56:30.637398061 +0200
  20046. @@ -1093,6 +1093,7 @@
  20047. #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */
  20048. #define WF_FORK 0x02 /* child wakeup after fork */
  20049. #define WF_MIGRATED 0x4 /* internal use, task got migrated */
  20050. +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
  20051. /*
  20052. * To aid in avoiding the subversion of "niceness" due to uneven distribution
  20053. @@ -1290,6 +1291,15 @@
  20054. extern void resched_curr(struct rq *rq);
  20055. extern void resched_cpu(int cpu);
  20056. +#ifdef CONFIG_PREEMPT_LAZY
  20057. +extern void resched_curr_lazy(struct rq *rq);
  20058. +#else
  20059. +static inline void resched_curr_lazy(struct rq *rq)
  20060. +{
  20061. + resched_curr(rq);
  20062. +}
  20063. +#endif
  20064. +
  20065. extern struct rt_bandwidth def_rt_bandwidth;
  20066. extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
  20067. diff -Nur linux-4.1.39.orig/kernel/sched/wait-simple.c linux-4.1.39/kernel/sched/wait-simple.c
  20068. --- linux-4.1.39.orig/kernel/sched/wait-simple.c 1970-01-01 01:00:00.000000000 +0100
  20069. +++ linux-4.1.39/kernel/sched/wait-simple.c 2017-04-18 17:56:30.637398061 +0200
  20070. @@ -0,0 +1,115 @@
  20071. +/*
  20072. + * Simple waitqueues without fancy flags and callbacks
  20073. + *
  20074. + * (C) 2011 Thomas Gleixner <tglx@linutronix.de>
  20075. + *
  20076. + * Based on kernel/wait.c
  20077. + *
  20078. + * For licencing details see kernel-base/COPYING
  20079. + */
  20080. +#include <linux/init.h>
  20081. +#include <linux/export.h>
  20082. +#include <linux/sched.h>
  20083. +#include <linux/wait-simple.h>
  20084. +
  20085. +/* Adds w to head->list. Must be called with head->lock locked. */
  20086. +static inline void __swait_enqueue(struct swait_head *head, struct swaiter *w)
  20087. +{
  20088. + list_add(&w->node, &head->list);
  20089. + /* We can't let the condition leak before the setting of head */
  20090. + smp_mb();
  20091. +}
  20092. +
  20093. +/* Removes w from head->list. Must be called with head->lock locked. */
  20094. +static inline void __swait_dequeue(struct swaiter *w)
  20095. +{
  20096. + list_del_init(&w->node);
  20097. +}
  20098. +
  20099. +void __init_swait_head(struct swait_head *head, struct lock_class_key *key)
  20100. +{
  20101. + raw_spin_lock_init(&head->lock);
  20102. + lockdep_set_class(&head->lock, key);
  20103. + INIT_LIST_HEAD(&head->list);
  20104. +}
  20105. +EXPORT_SYMBOL(__init_swait_head);
  20106. +
  20107. +void swait_prepare_locked(struct swait_head *head, struct swaiter *w)
  20108. +{
  20109. + w->task = current;
  20110. + if (list_empty(&w->node))
  20111. + __swait_enqueue(head, w);
  20112. +}
  20113. +
  20114. +void swait_prepare(struct swait_head *head, struct swaiter *w, int state)
  20115. +{
  20116. + unsigned long flags;
  20117. +
  20118. + raw_spin_lock_irqsave(&head->lock, flags);
  20119. + swait_prepare_locked(head, w);
  20120. + __set_current_state(state);
  20121. + raw_spin_unlock_irqrestore(&head->lock, flags);
  20122. +}
  20123. +EXPORT_SYMBOL(swait_prepare);
  20124. +
  20125. +void swait_finish_locked(struct swait_head *head, struct swaiter *w)
  20126. +{
  20127. + __set_current_state(TASK_RUNNING);
  20128. + if (w->task)
  20129. + __swait_dequeue(w);
  20130. +}
  20131. +
  20132. +void swait_finish(struct swait_head *head, struct swaiter *w)
  20133. +{
  20134. + unsigned long flags;
  20135. +
  20136. + __set_current_state(TASK_RUNNING);
  20137. + if (w->task) {
  20138. + raw_spin_lock_irqsave(&head->lock, flags);
  20139. + __swait_dequeue(w);
  20140. + raw_spin_unlock_irqrestore(&head->lock, flags);
  20141. + }
  20142. +}
  20143. +EXPORT_SYMBOL(swait_finish);
  20144. +
  20145. +unsigned int
  20146. +__swait_wake_locked(struct swait_head *head, unsigned int state, unsigned int num)
  20147. +{
  20148. + struct swaiter *curr, *next;
  20149. + int woken = 0;
  20150. +
  20151. + list_for_each_entry_safe(curr, next, &head->list, node) {
  20152. + if (wake_up_state(curr->task, state)) {
  20153. + __swait_dequeue(curr);
  20154. + /*
  20155. + * The waiting task can free the waiter as
  20156. + * soon as curr->task = NULL is written,
  20157. + * without taking any locks. A memory barrier
  20158. + * is required here to prevent the following
  20159. + * store to curr->task from getting ahead of
  20160. + * the dequeue operation.
  20161. + */
  20162. + smp_wmb();
  20163. + curr->task = NULL;
  20164. + if (++woken == num)
  20165. + break;
  20166. + }
  20167. + }
  20168. + return woken;
  20169. +}
  20170. +
  20171. +unsigned int
  20172. +__swait_wake(struct swait_head *head, unsigned int state, unsigned int num)
  20173. +{
  20174. + unsigned long flags;
  20175. + int woken;
  20176. +
  20177. + if (!swaitqueue_active(head))
  20178. + return 0;
  20179. +
  20180. + raw_spin_lock_irqsave(&head->lock, flags);
  20181. + woken = __swait_wake_locked(head, state, num);
  20182. + raw_spin_unlock_irqrestore(&head->lock, flags);
  20183. + return woken;
  20184. +}
  20185. +EXPORT_SYMBOL(__swait_wake);
  20186. diff -Nur linux-4.1.39.orig/kernel/sched/work-simple.c linux-4.1.39/kernel/sched/work-simple.c
  20187. --- linux-4.1.39.orig/kernel/sched/work-simple.c 1970-01-01 01:00:00.000000000 +0100
  20188. +++ linux-4.1.39/kernel/sched/work-simple.c 2017-04-18 17:56:30.637398061 +0200
  20189. @@ -0,0 +1,173 @@
  20190. +/*
  20191. + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de
  20192. + *
  20193. + * Provides a framework for enqueuing callbacks from irq context
  20194. + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context.
  20195. + */
  20196. +
  20197. +#include <linux/wait-simple.h>
  20198. +#include <linux/work-simple.h>
  20199. +#include <linux/kthread.h>
  20200. +#include <linux/slab.h>
  20201. +#include <linux/spinlock.h>
  20202. +#include <linux/export.h>
  20203. +
  20204. +#define SWORK_EVENT_PENDING (1 << 0)
  20205. +
  20206. +static DEFINE_MUTEX(worker_mutex);
  20207. +static struct sworker *glob_worker;
  20208. +
  20209. +struct sworker {
  20210. + struct list_head events;
  20211. + struct swait_head wq;
  20212. +
  20213. + raw_spinlock_t lock;
  20214. +
  20215. + struct task_struct *task;
  20216. + int refs;
  20217. +};
  20218. +
  20219. +static bool swork_readable(struct sworker *worker)
  20220. +{
  20221. + bool r;
  20222. +
  20223. + if (kthread_should_stop())
  20224. + return true;
  20225. +
  20226. + raw_spin_lock_irq(&worker->lock);
  20227. + r = !list_empty(&worker->events);
  20228. + raw_spin_unlock_irq(&worker->lock);
  20229. +
  20230. + return r;
  20231. +}
  20232. +
  20233. +static int swork_kthread(void *arg)
  20234. +{
  20235. + struct sworker *worker = arg;
  20236. +
  20237. + for (;;) {
  20238. + swait_event_interruptible(worker->wq,
  20239. + swork_readable(worker));
  20240. + if (kthread_should_stop())
  20241. + break;
  20242. +
  20243. + raw_spin_lock_irq(&worker->lock);
  20244. + while (!list_empty(&worker->events)) {
  20245. + struct swork_event *sev;
  20246. +
  20247. + sev = list_first_entry(&worker->events,
  20248. + struct swork_event, item);
  20249. + list_del(&sev->item);
  20250. + raw_spin_unlock_irq(&worker->lock);
  20251. +
  20252. + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING,
  20253. + &sev->flags));
  20254. + sev->func(sev);
  20255. + raw_spin_lock_irq(&worker->lock);
  20256. + }
  20257. + raw_spin_unlock_irq(&worker->lock);
  20258. + }
  20259. + return 0;
  20260. +}
  20261. +
  20262. +static struct sworker *swork_create(void)
  20263. +{
  20264. + struct sworker *worker;
  20265. +
  20266. + worker = kzalloc(sizeof(*worker), GFP_KERNEL);
  20267. + if (!worker)
  20268. + return ERR_PTR(-ENOMEM);
  20269. +
  20270. + INIT_LIST_HEAD(&worker->events);
  20271. + raw_spin_lock_init(&worker->lock);
  20272. + init_swait_head(&worker->wq);
  20273. +
  20274. + worker->task = kthread_run(swork_kthread, worker, "kswork");
  20275. + if (IS_ERR(worker->task)) {
  20276. + kfree(worker);
  20277. + return ERR_PTR(-ENOMEM);
  20278. + }
  20279. +
  20280. + return worker;
  20281. +}
  20282. +
  20283. +static void swork_destroy(struct sworker *worker)
  20284. +{
  20285. + kthread_stop(worker->task);
  20286. +
  20287. + WARN_ON(!list_empty(&worker->events));
  20288. + kfree(worker);
  20289. +}
  20290. +
  20291. +/**
  20292. + * swork_queue - queue swork
  20293. + *
  20294. + * Returns %false if @work was already on a queue, %true otherwise.
  20295. + *
  20296. + * The work is queued and processed on a random CPU
  20297. + */
  20298. +bool swork_queue(struct swork_event *sev)
  20299. +{
  20300. + unsigned long flags;
  20301. +
  20302. + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags))
  20303. + return false;
  20304. +
  20305. + raw_spin_lock_irqsave(&glob_worker->lock, flags);
  20306. + list_add_tail(&sev->item, &glob_worker->events);
  20307. + raw_spin_unlock_irqrestore(&glob_worker->lock, flags);
  20308. +
  20309. + swait_wake(&glob_worker->wq);
  20310. + return true;
  20311. +}
  20312. +EXPORT_SYMBOL_GPL(swork_queue);
  20313. +
  20314. +/**
  20315. + * swork_get - get an instance of the sworker
  20316. + *
  20317. + * Returns an negative error code if the initialization if the worker did not
  20318. + * work, %0 otherwise.
  20319. + *
  20320. + */
  20321. +int swork_get(void)
  20322. +{
  20323. + struct sworker *worker;
  20324. +
  20325. + mutex_lock(&worker_mutex);
  20326. + if (!glob_worker) {
  20327. + worker = swork_create();
  20328. + if (IS_ERR(worker)) {
  20329. + mutex_unlock(&worker_mutex);
  20330. + return -ENOMEM;
  20331. + }
  20332. +
  20333. + glob_worker = worker;
  20334. + }
  20335. +
  20336. + glob_worker->refs++;
  20337. + mutex_unlock(&worker_mutex);
  20338. +
  20339. + return 0;
  20340. +}
  20341. +EXPORT_SYMBOL_GPL(swork_get);
  20342. +
  20343. +/**
  20344. + * swork_put - puts an instance of the sworker
  20345. + *
  20346. + * Will destroy the sworker thread. This function must not be called until all
  20347. + * queued events have been completed.
  20348. + */
  20349. +void swork_put(void)
  20350. +{
  20351. + mutex_lock(&worker_mutex);
  20352. +
  20353. + glob_worker->refs--;
  20354. + if (glob_worker->refs > 0)
  20355. + goto out;
  20356. +
  20357. + swork_destroy(glob_worker);
  20358. + glob_worker = NULL;
  20359. +out:
  20360. + mutex_unlock(&worker_mutex);
  20361. +}
  20362. +EXPORT_SYMBOL_GPL(swork_put);
  20363. diff -Nur linux-4.1.39.orig/kernel/signal.c linux-4.1.39/kernel/signal.c
  20364. --- linux-4.1.39.orig/kernel/signal.c 2017-03-13 21:04:36.000000000 +0100
  20365. +++ linux-4.1.39/kernel/signal.c 2017-04-18 17:56:30.637398061 +0200
  20366. @@ -14,6 +14,7 @@
  20367. #include <linux/export.h>
  20368. #include <linux/init.h>
  20369. #include <linux/sched.h>
  20370. +#include <linux/sched/rt.h>
  20371. #include <linux/fs.h>
  20372. #include <linux/tty.h>
  20373. #include <linux/binfmts.h>
  20374. @@ -352,13 +353,45 @@
  20375. return false;
  20376. }
  20377. +#ifdef __HAVE_ARCH_CMPXCHG
  20378. +static inline struct sigqueue *get_task_cache(struct task_struct *t)
  20379. +{
  20380. + struct sigqueue *q = t->sigqueue_cache;
  20381. +
  20382. + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
  20383. + return NULL;
  20384. + return q;
  20385. +}
  20386. +
  20387. +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
  20388. +{
  20389. + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
  20390. + return 0;
  20391. + return 1;
  20392. +}
  20393. +
  20394. +#else
  20395. +
  20396. +static inline struct sigqueue *get_task_cache(struct task_struct *t)
  20397. +{
  20398. + return NULL;
  20399. +}
  20400. +
  20401. +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
  20402. +{
  20403. + return 1;
  20404. +}
  20405. +
  20406. +#endif
  20407. +
  20408. /*
  20409. * allocate a new signal queue record
  20410. * - this may be called without locks if and only if t == current, otherwise an
  20411. * appropriate lock must be held to stop the target task from exiting
  20412. */
  20413. static struct sigqueue *
  20414. -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
  20415. +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
  20416. + int override_rlimit, int fromslab)
  20417. {
  20418. struct sigqueue *q = NULL;
  20419. struct user_struct *user;
  20420. @@ -375,7 +408,10 @@
  20421. if (override_rlimit ||
  20422. atomic_read(&user->sigpending) <=
  20423. task_rlimit(t, RLIMIT_SIGPENDING)) {
  20424. - q = kmem_cache_alloc(sigqueue_cachep, flags);
  20425. + if (!fromslab)
  20426. + q = get_task_cache(t);
  20427. + if (!q)
  20428. + q = kmem_cache_alloc(sigqueue_cachep, flags);
  20429. } else {
  20430. print_dropped_signal(sig);
  20431. }
  20432. @@ -392,6 +428,13 @@
  20433. return q;
  20434. }
  20435. +static struct sigqueue *
  20436. +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
  20437. + int override_rlimit)
  20438. +{
  20439. + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
  20440. +}
  20441. +
  20442. static void __sigqueue_free(struct sigqueue *q)
  20443. {
  20444. if (q->flags & SIGQUEUE_PREALLOC)
  20445. @@ -401,6 +444,21 @@
  20446. kmem_cache_free(sigqueue_cachep, q);
  20447. }
  20448. +static void sigqueue_free_current(struct sigqueue *q)
  20449. +{
  20450. + struct user_struct *up;
  20451. +
  20452. + if (q->flags & SIGQUEUE_PREALLOC)
  20453. + return;
  20454. +
  20455. + up = q->user;
  20456. + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
  20457. + atomic_dec(&up->sigpending);
  20458. + free_uid(up);
  20459. + } else
  20460. + __sigqueue_free(q);
  20461. +}
  20462. +
  20463. void flush_sigqueue(struct sigpending *queue)
  20464. {
  20465. struct sigqueue *q;
  20466. @@ -414,6 +472,21 @@
  20467. }
  20468. /*
  20469. + * Called from __exit_signal. Flush tsk->pending and
  20470. + * tsk->sigqueue_cache
  20471. + */
  20472. +void flush_task_sigqueue(struct task_struct *tsk)
  20473. +{
  20474. + struct sigqueue *q;
  20475. +
  20476. + flush_sigqueue(&tsk->pending);
  20477. +
  20478. + q = get_task_cache(tsk);
  20479. + if (q)
  20480. + kmem_cache_free(sigqueue_cachep, q);
  20481. +}
  20482. +
  20483. +/*
  20484. * Flush all pending signals for a task.
  20485. */
  20486. void __flush_signals(struct task_struct *t)
  20487. @@ -565,7 +638,7 @@
  20488. still_pending:
  20489. list_del_init(&first->list);
  20490. copy_siginfo(info, &first->info);
  20491. - __sigqueue_free(first);
  20492. + sigqueue_free_current(first);
  20493. } else {
  20494. /*
  20495. * Ok, it wasn't in the queue. This must be
  20496. @@ -611,6 +684,8 @@
  20497. {
  20498. int signr;
  20499. + WARN_ON_ONCE(tsk != current);
  20500. +
  20501. /* We only dequeue private signals from ourselves, we don't let
  20502. * signalfd steal them
  20503. */
  20504. @@ -1207,8 +1282,8 @@
  20505. * We don't want to have recursive SIGSEGV's etc, for example,
  20506. * that is why we also clear SIGNAL_UNKILLABLE.
  20507. */
  20508. -int
  20509. -force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  20510. +static int
  20511. +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  20512. {
  20513. unsigned long int flags;
  20514. int ret, blocked, ignored;
  20515. @@ -1233,6 +1308,39 @@
  20516. return ret;
  20517. }
  20518. +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  20519. +{
  20520. +/*
  20521. + * On some archs, PREEMPT_RT has to delay sending a signal from a trap
  20522. + * since it can not enable preemption, and the signal code's spin_locks
  20523. + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
  20524. + * send the signal on exit of the trap.
  20525. + */
  20526. +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
  20527. + if (in_atomic()) {
  20528. + if (WARN_ON_ONCE(t != current))
  20529. + return 0;
  20530. + if (WARN_ON_ONCE(t->forced_info.si_signo))
  20531. + return 0;
  20532. +
  20533. + if (is_si_special(info)) {
  20534. + WARN_ON_ONCE(info != SEND_SIG_PRIV);
  20535. + t->forced_info.si_signo = sig;
  20536. + t->forced_info.si_errno = 0;
  20537. + t->forced_info.si_code = SI_KERNEL;
  20538. + t->forced_info.si_pid = 0;
  20539. + t->forced_info.si_uid = 0;
  20540. + } else {
  20541. + t->forced_info = *info;
  20542. + }
  20543. +
  20544. + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
  20545. + return 0;
  20546. + }
  20547. +#endif
  20548. + return do_force_sig_info(sig, info, t);
  20549. +}
  20550. +
  20551. /*
  20552. * Nuke all other threads in the group.
  20553. */
  20554. @@ -1267,12 +1375,12 @@
  20555. * Disable interrupts early to avoid deadlocks.
  20556. * See rcu_read_unlock() comment header for details.
  20557. */
  20558. - local_irq_save(*flags);
  20559. + local_irq_save_nort(*flags);
  20560. rcu_read_lock();
  20561. sighand = rcu_dereference(tsk->sighand);
  20562. if (unlikely(sighand == NULL)) {
  20563. rcu_read_unlock();
  20564. - local_irq_restore(*flags);
  20565. + local_irq_restore_nort(*flags);
  20566. break;
  20567. }
  20568. /*
  20569. @@ -1293,7 +1401,7 @@
  20570. }
  20571. spin_unlock(&sighand->siglock);
  20572. rcu_read_unlock();
  20573. - local_irq_restore(*flags);
  20574. + local_irq_restore_nort(*flags);
  20575. }
  20576. return sighand;
  20577. @@ -1536,7 +1644,8 @@
  20578. */
  20579. struct sigqueue *sigqueue_alloc(void)
  20580. {
  20581. - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
  20582. + /* Preallocated sigqueue objects always from the slabcache ! */
  20583. + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
  20584. if (q)
  20585. q->flags |= SIGQUEUE_PREALLOC;
  20586. @@ -1897,15 +2006,7 @@
  20587. if (gstop_done && ptrace_reparented(current))
  20588. do_notify_parent_cldstop(current, false, why);
  20589. - /*
  20590. - * Don't want to allow preemption here, because
  20591. - * sys_ptrace() needs this task to be inactive.
  20592. - *
  20593. - * XXX: implement read_unlock_no_resched().
  20594. - */
  20595. - preempt_disable();
  20596. read_unlock(&tasklist_lock);
  20597. - preempt_enable_no_resched();
  20598. freezable_schedule();
  20599. } else {
  20600. /*
  20601. diff -Nur linux-4.1.39.orig/kernel/softirq.c linux-4.1.39/kernel/softirq.c
  20602. --- linux-4.1.39.orig/kernel/softirq.c 2017-03-13 21:04:36.000000000 +0100
  20603. +++ linux-4.1.39/kernel/softirq.c 2017-04-18 17:56:30.637398061 +0200
  20604. @@ -21,10 +21,12 @@
  20605. #include <linux/freezer.h>
  20606. #include <linux/kthread.h>
  20607. #include <linux/rcupdate.h>
  20608. +#include <linux/delay.h>
  20609. #include <linux/ftrace.h>
  20610. #include <linux/smp.h>
  20611. #include <linux/smpboot.h>
  20612. #include <linux/tick.h>
  20613. +#include <linux/locallock.h>
  20614. #include <linux/irq.h>
  20615. #define CREATE_TRACE_POINTS
  20616. @@ -56,12 +58,108 @@
  20617. static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
  20618. DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
  20619. +#ifdef CONFIG_PREEMPT_RT_FULL
  20620. +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ))
  20621. +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd);
  20622. +#endif
  20623. const char * const softirq_to_name[NR_SOFTIRQS] = {
  20624. "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
  20625. "TASKLET", "SCHED", "HRTIMER", "RCU"
  20626. };
  20627. +#ifdef CONFIG_NO_HZ_COMMON
  20628. +# ifdef CONFIG_PREEMPT_RT_FULL
  20629. +
  20630. +struct softirq_runner {
  20631. + struct task_struct *runner[NR_SOFTIRQS];
  20632. +};
  20633. +
  20634. +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners);
  20635. +
  20636. +static inline void softirq_set_runner(unsigned int sirq)
  20637. +{
  20638. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  20639. +
  20640. + sr->runner[sirq] = current;
  20641. +}
  20642. +
  20643. +static inline void softirq_clr_runner(unsigned int sirq)
  20644. +{
  20645. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  20646. +
  20647. + sr->runner[sirq] = NULL;
  20648. +}
  20649. +
  20650. +/*
  20651. + * On preempt-rt a softirq running context might be blocked on a
  20652. + * lock. There might be no other runnable task on this CPU because the
  20653. + * lock owner runs on some other CPU. So we have to go into idle with
  20654. + * the pending bit set. Therefor we need to check this otherwise we
  20655. + * warn about false positives which confuses users and defeats the
  20656. + * whole purpose of this test.
  20657. + *
  20658. + * This code is called with interrupts disabled.
  20659. + */
  20660. +void softirq_check_pending_idle(void)
  20661. +{
  20662. + static int rate_limit;
  20663. + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners);
  20664. + u32 warnpending;
  20665. + int i;
  20666. +
  20667. + if (rate_limit >= 10)
  20668. + return;
  20669. +
  20670. + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK;
  20671. + for (i = 0; i < NR_SOFTIRQS; i++) {
  20672. + struct task_struct *tsk = sr->runner[i];
  20673. +
  20674. + /*
  20675. + * The wakeup code in rtmutex.c wakes up the task
  20676. + * _before_ it sets pi_blocked_on to NULL under
  20677. + * tsk->pi_lock. So we need to check for both: state
  20678. + * and pi_blocked_on.
  20679. + */
  20680. + if (tsk) {
  20681. + raw_spin_lock(&tsk->pi_lock);
  20682. + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) {
  20683. + /* Clear all bits pending in that task */
  20684. + warnpending &= ~(tsk->softirqs_raised);
  20685. + warnpending &= ~(1 << i);
  20686. + }
  20687. + raw_spin_unlock(&tsk->pi_lock);
  20688. + }
  20689. + }
  20690. +
  20691. + if (warnpending) {
  20692. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  20693. + warnpending);
  20694. + rate_limit++;
  20695. + }
  20696. +}
  20697. +# else
  20698. +/*
  20699. + * On !PREEMPT_RT we just printk rate limited:
  20700. + */
  20701. +void softirq_check_pending_idle(void)
  20702. +{
  20703. + static int rate_limit;
  20704. +
  20705. + if (rate_limit < 10 &&
  20706. + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  20707. + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
  20708. + local_softirq_pending());
  20709. + rate_limit++;
  20710. + }
  20711. +}
  20712. +# endif
  20713. +
  20714. +#else /* !CONFIG_NO_HZ_COMMON */
  20715. +static inline void softirq_set_runner(unsigned int sirq) { }
  20716. +static inline void softirq_clr_runner(unsigned int sirq) { }
  20717. +#endif
  20718. +
  20719. /*
  20720. * we cannot loop indefinitely here to avoid userspace starvation,
  20721. * but we also don't want to introduce a worst case 1/HZ latency
  20722. @@ -77,6 +175,79 @@
  20723. wake_up_process(tsk);
  20724. }
  20725. +#ifdef CONFIG_PREEMPT_RT_FULL
  20726. +static void wakeup_timer_softirqd(void)
  20727. +{
  20728. + /* Interrupts are disabled: no need to stop preemption */
  20729. + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd);
  20730. +
  20731. + if (tsk && tsk->state != TASK_RUNNING)
  20732. + wake_up_process(tsk);
  20733. +}
  20734. +#endif
  20735. +
  20736. +static void handle_softirq(unsigned int vec_nr)
  20737. +{
  20738. + struct softirq_action *h = softirq_vec + vec_nr;
  20739. + int prev_count;
  20740. +
  20741. + prev_count = preempt_count();
  20742. +
  20743. + kstat_incr_softirqs_this_cpu(vec_nr);
  20744. +
  20745. + trace_softirq_entry(vec_nr);
  20746. + h->action(h);
  20747. + trace_softirq_exit(vec_nr);
  20748. + if (unlikely(prev_count != preempt_count())) {
  20749. + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  20750. + vec_nr, softirq_to_name[vec_nr], h->action,
  20751. + prev_count, preempt_count());
  20752. + preempt_count_set(prev_count);
  20753. + }
  20754. +}
  20755. +
  20756. +#ifndef CONFIG_PREEMPT_RT_FULL
  20757. +static inline int ksoftirqd_softirq_pending(void)
  20758. +{
  20759. + return local_softirq_pending();
  20760. +}
  20761. +
  20762. +static void handle_pending_softirqs(u32 pending)
  20763. +{
  20764. + struct softirq_action *h = softirq_vec;
  20765. + int softirq_bit;
  20766. +
  20767. + local_irq_enable();
  20768. +
  20769. + h = softirq_vec;
  20770. +
  20771. + while ((softirq_bit = ffs(pending))) {
  20772. + unsigned int vec_nr;
  20773. +
  20774. + h += softirq_bit - 1;
  20775. + vec_nr = h - softirq_vec;
  20776. + handle_softirq(vec_nr);
  20777. +
  20778. + h++;
  20779. + pending >>= softirq_bit;
  20780. + }
  20781. +
  20782. + rcu_bh_qs();
  20783. + local_irq_disable();
  20784. +}
  20785. +
  20786. +static void run_ksoftirqd(unsigned int cpu)
  20787. +{
  20788. + local_irq_disable();
  20789. + if (ksoftirqd_softirq_pending()) {
  20790. + __do_softirq();
  20791. + local_irq_enable();
  20792. + cond_resched_rcu_qs();
  20793. + return;
  20794. + }
  20795. + local_irq_enable();
  20796. +}
  20797. +
  20798. /*
  20799. * preempt_count and SOFTIRQ_OFFSET usage:
  20800. * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
  20801. @@ -116,9 +287,9 @@
  20802. if (preempt_count() == cnt) {
  20803. #ifdef CONFIG_DEBUG_PREEMPT
  20804. - current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
  20805. + current->preempt_disable_ip = get_lock_parent_ip();
  20806. #endif
  20807. - trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
  20808. + trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
  20809. }
  20810. }
  20811. EXPORT_SYMBOL(__local_bh_disable_ip);
  20812. @@ -232,10 +403,8 @@
  20813. unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
  20814. unsigned long old_flags = current->flags;
  20815. int max_restart = MAX_SOFTIRQ_RESTART;
  20816. - struct softirq_action *h;
  20817. bool in_hardirq;
  20818. __u32 pending;
  20819. - int softirq_bit;
  20820. /*
  20821. * Mask out PF_MEMALLOC s current task context is borrowed for the
  20822. @@ -254,36 +423,7 @@
  20823. /* Reset the pending bitmask before enabling irqs */
  20824. set_softirq_pending(0);
  20825. - local_irq_enable();
  20826. -
  20827. - h = softirq_vec;
  20828. -
  20829. - while ((softirq_bit = ffs(pending))) {
  20830. - unsigned int vec_nr;
  20831. - int prev_count;
  20832. -
  20833. - h += softirq_bit - 1;
  20834. -
  20835. - vec_nr = h - softirq_vec;
  20836. - prev_count = preempt_count();
  20837. -
  20838. - kstat_incr_softirqs_this_cpu(vec_nr);
  20839. -
  20840. - trace_softirq_entry(vec_nr);
  20841. - h->action(h);
  20842. - trace_softirq_exit(vec_nr);
  20843. - if (unlikely(prev_count != preempt_count())) {
  20844. - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
  20845. - vec_nr, softirq_to_name[vec_nr], h->action,
  20846. - prev_count, preempt_count());
  20847. - preempt_count_set(prev_count);
  20848. - }
  20849. - h++;
  20850. - pending >>= softirq_bit;
  20851. - }
  20852. -
  20853. - rcu_bh_qs();
  20854. - local_irq_disable();
  20855. + handle_pending_softirqs(pending);
  20856. pending = local_softirq_pending();
  20857. if (pending) {
  20858. @@ -320,6 +460,310 @@
  20859. }
  20860. /*
  20861. + * This function must run with irqs disabled!
  20862. + */
  20863. +void raise_softirq_irqoff(unsigned int nr)
  20864. +{
  20865. + __raise_softirq_irqoff(nr);
  20866. +
  20867. + /*
  20868. + * If we're in an interrupt or softirq, we're done
  20869. + * (this also catches softirq-disabled code). We will
  20870. + * actually run the softirq once we return from
  20871. + * the irq or softirq.
  20872. + *
  20873. + * Otherwise we wake up ksoftirqd to make sure we
  20874. + * schedule the softirq soon.
  20875. + */
  20876. + if (!in_interrupt())
  20877. + wakeup_softirqd();
  20878. +}
  20879. +
  20880. +void __raise_softirq_irqoff(unsigned int nr)
  20881. +{
  20882. + trace_softirq_raise(nr);
  20883. + or_softirq_pending(1UL << nr);
  20884. +}
  20885. +
  20886. +static inline void local_bh_disable_nort(void) { local_bh_disable(); }
  20887. +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); }
  20888. +static void ksoftirqd_set_sched_params(unsigned int cpu) { }
  20889. +
  20890. +#else /* !PREEMPT_RT_FULL */
  20891. +
  20892. +/*
  20893. + * On RT we serialize softirq execution with a cpu local lock per softirq
  20894. + */
  20895. +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks);
  20896. +
  20897. +void __init softirq_early_init(void)
  20898. +{
  20899. + int i;
  20900. +
  20901. + for (i = 0; i < NR_SOFTIRQS; i++)
  20902. + local_irq_lock_init(local_softirq_locks[i]);
  20903. +}
  20904. +
  20905. +static void lock_softirq(int which)
  20906. +{
  20907. + local_lock(local_softirq_locks[which]);
  20908. +}
  20909. +
  20910. +static void unlock_softirq(int which)
  20911. +{
  20912. + local_unlock(local_softirq_locks[which]);
  20913. +}
  20914. +
  20915. +static void do_single_softirq(int which)
  20916. +{
  20917. + unsigned long old_flags = current->flags;
  20918. +
  20919. + current->flags &= ~PF_MEMALLOC;
  20920. + vtime_account_irq_enter(current);
  20921. + current->flags |= PF_IN_SOFTIRQ;
  20922. + lockdep_softirq_enter();
  20923. + local_irq_enable();
  20924. + handle_softirq(which);
  20925. + local_irq_disable();
  20926. + lockdep_softirq_exit();
  20927. + current->flags &= ~PF_IN_SOFTIRQ;
  20928. + vtime_account_irq_enter(current);
  20929. + tsk_restore_flags(current, old_flags, PF_MEMALLOC);
  20930. +}
  20931. +
  20932. +/*
  20933. + * Called with interrupts disabled. Process softirqs which were raised
  20934. + * in current context (or on behalf of ksoftirqd).
  20935. + */
  20936. +static void do_current_softirqs(void)
  20937. +{
  20938. + while (current->softirqs_raised) {
  20939. + int i = __ffs(current->softirqs_raised);
  20940. + unsigned int pending, mask = (1U << i);
  20941. +
  20942. + current->softirqs_raised &= ~mask;
  20943. + local_irq_enable();
  20944. +
  20945. + /*
  20946. + * If the lock is contended, we boost the owner to
  20947. + * process the softirq or leave the critical section
  20948. + * now.
  20949. + */
  20950. + lock_softirq(i);
  20951. + local_irq_disable();
  20952. + softirq_set_runner(i);
  20953. + /*
  20954. + * Check with the local_softirq_pending() bits,
  20955. + * whether we need to process this still or if someone
  20956. + * else took care of it.
  20957. + */
  20958. + pending = local_softirq_pending();
  20959. + if (pending & mask) {
  20960. + set_softirq_pending(pending & ~mask);
  20961. + do_single_softirq(i);
  20962. + }
  20963. + softirq_clr_runner(i);
  20964. + WARN_ON(current->softirq_nestcnt != 1);
  20965. + local_irq_enable();
  20966. + unlock_softirq(i);
  20967. + local_irq_disable();
  20968. + }
  20969. +}
  20970. +
  20971. +void __local_bh_disable(void)
  20972. +{
  20973. + if (++current->softirq_nestcnt == 1)
  20974. + migrate_disable();
  20975. +}
  20976. +EXPORT_SYMBOL(__local_bh_disable);
  20977. +
  20978. +void __local_bh_enable(void)
  20979. +{
  20980. + if (WARN_ON(current->softirq_nestcnt == 0))
  20981. + return;
  20982. +
  20983. + local_irq_disable();
  20984. + if (current->softirq_nestcnt == 1 && current->softirqs_raised)
  20985. + do_current_softirqs();
  20986. + local_irq_enable();
  20987. +
  20988. + if (--current->softirq_nestcnt == 0)
  20989. + migrate_enable();
  20990. +}
  20991. +EXPORT_SYMBOL(__local_bh_enable);
  20992. +
  20993. +void _local_bh_enable(void)
  20994. +{
  20995. + if (WARN_ON(current->softirq_nestcnt == 0))
  20996. + return;
  20997. + if (--current->softirq_nestcnt == 0)
  20998. + migrate_enable();
  20999. +}
  21000. +EXPORT_SYMBOL(_local_bh_enable);
  21001. +
  21002. +int in_serving_softirq(void)
  21003. +{
  21004. + return current->flags & PF_IN_SOFTIRQ;
  21005. +}
  21006. +EXPORT_SYMBOL(in_serving_softirq);
  21007. +
  21008. +/* Called with preemption disabled */
  21009. +static void run_ksoftirqd(unsigned int cpu)
  21010. +{
  21011. + local_irq_disable();
  21012. + current->softirq_nestcnt++;
  21013. +
  21014. + do_current_softirqs();
  21015. + current->softirq_nestcnt--;
  21016. + local_irq_enable();
  21017. + cond_resched_rcu_qs();
  21018. +}
  21019. +
  21020. +/*
  21021. + * Called from netif_rx_ni(). Preemption enabled, but migration
  21022. + * disabled. So the cpu can't go away under us.
  21023. + */
  21024. +void thread_do_softirq(void)
  21025. +{
  21026. + if (!in_serving_softirq() && current->softirqs_raised) {
  21027. + current->softirq_nestcnt++;
  21028. + do_current_softirqs();
  21029. + current->softirq_nestcnt--;
  21030. + }
  21031. +}
  21032. +
  21033. +static void do_raise_softirq_irqoff(unsigned int nr)
  21034. +{
  21035. + unsigned int mask;
  21036. +
  21037. + mask = 1UL << nr;
  21038. +
  21039. + trace_softirq_raise(nr);
  21040. + or_softirq_pending(mask);
  21041. +
  21042. + /*
  21043. + * If we are not in a hard interrupt and inside a bh disabled
  21044. + * region, we simply raise the flag on current. local_bh_enable()
  21045. + * will make sure that the softirq is executed. Otherwise we
  21046. + * delegate it to ksoftirqd.
  21047. + */
  21048. + if (!in_irq() && current->softirq_nestcnt)
  21049. + current->softirqs_raised |= mask;
  21050. + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd))
  21051. + return;
  21052. +
  21053. + if (mask & TIMER_SOFTIRQS)
  21054. + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
  21055. + else
  21056. + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
  21057. +}
  21058. +
  21059. +static void wakeup_proper_softirq(unsigned int nr)
  21060. +{
  21061. + if ((1UL << nr) & TIMER_SOFTIRQS)
  21062. + wakeup_timer_softirqd();
  21063. + else
  21064. + wakeup_softirqd();
  21065. +}
  21066. +
  21067. +
  21068. +void __raise_softirq_irqoff(unsigned int nr)
  21069. +{
  21070. + do_raise_softirq_irqoff(nr);
  21071. + if (!in_irq() && !current->softirq_nestcnt)
  21072. + wakeup_proper_softirq(nr);
  21073. +}
  21074. +
  21075. +/*
  21076. + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd
  21077. + */
  21078. +void __raise_softirq_irqoff_ksoft(unsigned int nr)
  21079. +{
  21080. + unsigned int mask;
  21081. +
  21082. + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) ||
  21083. + !__this_cpu_read(ktimer_softirqd)))
  21084. + return;
  21085. + mask = 1UL << nr;
  21086. +
  21087. + trace_softirq_raise(nr);
  21088. + or_softirq_pending(mask);
  21089. + if (mask & TIMER_SOFTIRQS)
  21090. + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask;
  21091. + else
  21092. + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask;
  21093. + wakeup_proper_softirq(nr);
  21094. +}
  21095. +
  21096. +/*
  21097. + * This function must run with irqs disabled!
  21098. + */
  21099. +void raise_softirq_irqoff(unsigned int nr)
  21100. +{
  21101. + do_raise_softirq_irqoff(nr);
  21102. +
  21103. + /*
  21104. + * If we're in an hard interrupt we let irq return code deal
  21105. + * with the wakeup of ksoftirqd.
  21106. + */
  21107. + if (in_irq())
  21108. + return;
  21109. + /*
  21110. + * If we are in thread context but outside of a bh disabled
  21111. + * region, we need to wake ksoftirqd as well.
  21112. + *
  21113. + * CHECKME: Some of the places which do that could be wrapped
  21114. + * into local_bh_disable/enable pairs. Though it's unclear
  21115. + * whether this is worth the effort. To find those places just
  21116. + * raise a WARN() if the condition is met.
  21117. + */
  21118. + if (!current->softirq_nestcnt)
  21119. + wakeup_proper_softirq(nr);
  21120. +}
  21121. +
  21122. +static inline int ksoftirqd_softirq_pending(void)
  21123. +{
  21124. + return current->softirqs_raised;
  21125. +}
  21126. +
  21127. +static inline void local_bh_disable_nort(void) { }
  21128. +static inline void _local_bh_enable_nort(void) { }
  21129. +
  21130. +static inline void ksoftirqd_set_sched_params(unsigned int cpu)
  21131. +{
  21132. + /* Take over all but timer pending softirqs when starting */
  21133. + local_irq_disable();
  21134. + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS;
  21135. + local_irq_enable();
  21136. +}
  21137. +
  21138. +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu)
  21139. +{
  21140. + struct sched_param param = { .sched_priority = 1 };
  21141. +
  21142. + sched_setscheduler(current, SCHED_FIFO, &param);
  21143. +
  21144. + /* Take over timer pending softirqs when starting */
  21145. + local_irq_disable();
  21146. + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS;
  21147. + local_irq_enable();
  21148. +}
  21149. +
  21150. +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu,
  21151. + bool online)
  21152. +{
  21153. + struct sched_param param = { .sched_priority = 0 };
  21154. +
  21155. + sched_setscheduler(current, SCHED_NORMAL, &param);
  21156. +}
  21157. +
  21158. +static int ktimer_softirqd_should_run(unsigned int cpu)
  21159. +{
  21160. + return current->softirqs_raised;
  21161. +}
  21162. +
  21163. +#endif /* PREEMPT_RT_FULL */
  21164. +/*
  21165. * Enter an interrupt context.
  21166. */
  21167. void irq_enter(void)
  21168. @@ -330,9 +774,9 @@
  21169. * Prevent raise_softirq from needlessly waking up ksoftirqd
  21170. * here, as softirq will be serviced on return from interrupt.
  21171. */
  21172. - local_bh_disable();
  21173. + local_bh_disable_nort();
  21174. tick_irq_enter();
  21175. - _local_bh_enable();
  21176. + _local_bh_enable_nort();
  21177. }
  21178. __irq_enter();
  21179. @@ -340,6 +784,7 @@
  21180. static inline void invoke_softirq(void)
  21181. {
  21182. +#ifndef CONFIG_PREEMPT_RT_FULL
  21183. if (!force_irqthreads) {
  21184. #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
  21185. /*
  21186. @@ -359,6 +804,18 @@
  21187. } else {
  21188. wakeup_softirqd();
  21189. }
  21190. +#else /* PREEMPT_RT_FULL */
  21191. + unsigned long flags;
  21192. +
  21193. + local_irq_save(flags);
  21194. + if (__this_cpu_read(ksoftirqd) &&
  21195. + __this_cpu_read(ksoftirqd)->softirqs_raised)
  21196. + wakeup_softirqd();
  21197. + if (__this_cpu_read(ktimer_softirqd) &&
  21198. + __this_cpu_read(ktimer_softirqd)->softirqs_raised)
  21199. + wakeup_timer_softirqd();
  21200. + local_irq_restore(flags);
  21201. +#endif
  21202. }
  21203. static inline void tick_irq_exit(void)
  21204. @@ -395,26 +852,6 @@
  21205. trace_hardirq_exit(); /* must be last! */
  21206. }
  21207. -/*
  21208. - * This function must run with irqs disabled!
  21209. - */
  21210. -inline void raise_softirq_irqoff(unsigned int nr)
  21211. -{
  21212. - __raise_softirq_irqoff(nr);
  21213. -
  21214. - /*
  21215. - * If we're in an interrupt or softirq, we're done
  21216. - * (this also catches softirq-disabled code). We will
  21217. - * actually run the softirq once we return from
  21218. - * the irq or softirq.
  21219. - *
  21220. - * Otherwise we wake up ksoftirqd to make sure we
  21221. - * schedule the softirq soon.
  21222. - */
  21223. - if (!in_interrupt())
  21224. - wakeup_softirqd();
  21225. -}
  21226. -
  21227. void raise_softirq(unsigned int nr)
  21228. {
  21229. unsigned long flags;
  21230. @@ -424,12 +861,6 @@
  21231. local_irq_restore(flags);
  21232. }
  21233. -void __raise_softirq_irqoff(unsigned int nr)
  21234. -{
  21235. - trace_softirq_raise(nr);
  21236. - or_softirq_pending(1UL << nr);
  21237. -}
  21238. -
  21239. void open_softirq(int nr, void (*action)(struct softirq_action *))
  21240. {
  21241. softirq_vec[nr].action = action;
  21242. @@ -446,15 +877,45 @@
  21243. static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
  21244. static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
  21245. +static void inline
  21246. +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
  21247. +{
  21248. + if (tasklet_trylock(t)) {
  21249. +again:
  21250. + /* We may have been preempted before tasklet_trylock
  21251. + * and __tasklet_action may have already run.
  21252. + * So double check the sched bit while the takslet
  21253. + * is locked before adding it to the list.
  21254. + */
  21255. + if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
  21256. + t->next = NULL;
  21257. + *head->tail = t;
  21258. + head->tail = &(t->next);
  21259. + raise_softirq_irqoff(nr);
  21260. + tasklet_unlock(t);
  21261. + } else {
  21262. + /* This is subtle. If we hit the corner case above
  21263. + * It is possible that we get preempted right here,
  21264. + * and another task has successfully called
  21265. + * tasklet_schedule(), then this function, and
  21266. + * failed on the trylock. Thus we must be sure
  21267. + * before releasing the tasklet lock, that the
  21268. + * SCHED_BIT is clear. Otherwise the tasklet
  21269. + * may get its SCHED_BIT set, but not added to the
  21270. + * list
  21271. + */
  21272. + if (!tasklet_tryunlock(t))
  21273. + goto again;
  21274. + }
  21275. + }
  21276. +}
  21277. +
  21278. void __tasklet_schedule(struct tasklet_struct *t)
  21279. {
  21280. unsigned long flags;
  21281. local_irq_save(flags);
  21282. - t->next = NULL;
  21283. - *__this_cpu_read(tasklet_vec.tail) = t;
  21284. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  21285. - raise_softirq_irqoff(TASKLET_SOFTIRQ);
  21286. + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
  21287. local_irq_restore(flags);
  21288. }
  21289. EXPORT_SYMBOL(__tasklet_schedule);
  21290. @@ -464,10 +925,7 @@
  21291. unsigned long flags;
  21292. local_irq_save(flags);
  21293. - t->next = NULL;
  21294. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  21295. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  21296. - raise_softirq_irqoff(HI_SOFTIRQ);
  21297. + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
  21298. local_irq_restore(flags);
  21299. }
  21300. EXPORT_SYMBOL(__tasklet_hi_schedule);
  21301. @@ -476,82 +934,122 @@
  21302. {
  21303. BUG_ON(!irqs_disabled());
  21304. - t->next = __this_cpu_read(tasklet_hi_vec.head);
  21305. - __this_cpu_write(tasklet_hi_vec.head, t);
  21306. - __raise_softirq_irqoff(HI_SOFTIRQ);
  21307. + __tasklet_hi_schedule(t);
  21308. }
  21309. EXPORT_SYMBOL(__tasklet_hi_schedule_first);
  21310. -static void tasklet_action(struct softirq_action *a)
  21311. +void tasklet_enable(struct tasklet_struct *t)
  21312. {
  21313. - struct tasklet_struct *list;
  21314. + if (!atomic_dec_and_test(&t->count))
  21315. + return;
  21316. + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
  21317. + tasklet_schedule(t);
  21318. +}
  21319. +EXPORT_SYMBOL(tasklet_enable);
  21320. - local_irq_disable();
  21321. - list = __this_cpu_read(tasklet_vec.head);
  21322. - __this_cpu_write(tasklet_vec.head, NULL);
  21323. - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  21324. - local_irq_enable();
  21325. +static void __tasklet_action(struct softirq_action *a,
  21326. + struct tasklet_struct *list)
  21327. +{
  21328. + int loops = 1000000;
  21329. while (list) {
  21330. struct tasklet_struct *t = list;
  21331. list = list->next;
  21332. - if (tasklet_trylock(t)) {
  21333. - if (!atomic_read(&t->count)) {
  21334. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  21335. - &t->state))
  21336. - BUG();
  21337. - t->func(t->data);
  21338. - tasklet_unlock(t);
  21339. - continue;
  21340. - }
  21341. - tasklet_unlock(t);
  21342. + /*
  21343. + * Should always succeed - after a tasklist got on the
  21344. + * list (after getting the SCHED bit set from 0 to 1),
  21345. + * nothing but the tasklet softirq it got queued to can
  21346. + * lock it:
  21347. + */
  21348. + if (!tasklet_trylock(t)) {
  21349. + WARN_ON(1);
  21350. + continue;
  21351. }
  21352. - local_irq_disable();
  21353. t->next = NULL;
  21354. - *__this_cpu_read(tasklet_vec.tail) = t;
  21355. - __this_cpu_write(tasklet_vec.tail, &(t->next));
  21356. - __raise_softirq_irqoff(TASKLET_SOFTIRQ);
  21357. - local_irq_enable();
  21358. +
  21359. + /*
  21360. + * If we cannot handle the tasklet because it's disabled,
  21361. + * mark it as pending. tasklet_enable() will later
  21362. + * re-schedule the tasklet.
  21363. + */
  21364. + if (unlikely(atomic_read(&t->count))) {
  21365. +out_disabled:
  21366. + /* implicit unlock: */
  21367. + wmb();
  21368. + t->state = TASKLET_STATEF_PENDING;
  21369. + continue;
  21370. + }
  21371. +
  21372. + /*
  21373. + * After this point on the tasklet might be rescheduled
  21374. + * on another CPU, but it can only be added to another
  21375. + * CPU's tasklet list if we unlock the tasklet (which we
  21376. + * dont do yet).
  21377. + */
  21378. + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  21379. + WARN_ON(1);
  21380. +
  21381. +again:
  21382. + t->func(t->data);
  21383. +
  21384. + /*
  21385. + * Try to unlock the tasklet. We must use cmpxchg, because
  21386. + * another CPU might have scheduled or disabled the tasklet.
  21387. + * We only allow the STATE_RUN -> 0 transition here.
  21388. + */
  21389. + while (!tasklet_tryunlock(t)) {
  21390. + /*
  21391. + * If it got disabled meanwhile, bail out:
  21392. + */
  21393. + if (atomic_read(&t->count))
  21394. + goto out_disabled;
  21395. + /*
  21396. + * If it got scheduled meanwhile, re-execute
  21397. + * the tasklet function:
  21398. + */
  21399. + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
  21400. + goto again;
  21401. + if (!--loops) {
  21402. + printk("hm, tasklet state: %08lx\n", t->state);
  21403. + WARN_ON(1);
  21404. + tasklet_unlock(t);
  21405. + break;
  21406. + }
  21407. + }
  21408. }
  21409. }
  21410. +static void tasklet_action(struct softirq_action *a)
  21411. +{
  21412. + struct tasklet_struct *list;
  21413. +
  21414. + local_irq_disable();
  21415. +
  21416. + list = __this_cpu_read(tasklet_vec.head);
  21417. + __this_cpu_write(tasklet_vec.head, NULL);
  21418. + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
  21419. +
  21420. + local_irq_enable();
  21421. +
  21422. + __tasklet_action(a, list);
  21423. +}
  21424. +
  21425. static void tasklet_hi_action(struct softirq_action *a)
  21426. {
  21427. struct tasklet_struct *list;
  21428. local_irq_disable();
  21429. +
  21430. list = __this_cpu_read(tasklet_hi_vec.head);
  21431. __this_cpu_write(tasklet_hi_vec.head, NULL);
  21432. __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
  21433. - local_irq_enable();
  21434. - while (list) {
  21435. - struct tasklet_struct *t = list;
  21436. -
  21437. - list = list->next;
  21438. -
  21439. - if (tasklet_trylock(t)) {
  21440. - if (!atomic_read(&t->count)) {
  21441. - if (!test_and_clear_bit(TASKLET_STATE_SCHED,
  21442. - &t->state))
  21443. - BUG();
  21444. - t->func(t->data);
  21445. - tasklet_unlock(t);
  21446. - continue;
  21447. - }
  21448. - tasklet_unlock(t);
  21449. - }
  21450. + local_irq_enable();
  21451. - local_irq_disable();
  21452. - t->next = NULL;
  21453. - *__this_cpu_read(tasklet_hi_vec.tail) = t;
  21454. - __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
  21455. - __raise_softirq_irqoff(HI_SOFTIRQ);
  21456. - local_irq_enable();
  21457. - }
  21458. + __tasklet_action(a, list);
  21459. }
  21460. void tasklet_init(struct tasklet_struct *t,
  21461. @@ -572,7 +1070,7 @@
  21462. while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
  21463. do {
  21464. - yield();
  21465. + msleep(1);
  21466. } while (test_bit(TASKLET_STATE_SCHED, &t->state));
  21467. }
  21468. tasklet_unlock_wait(t);
  21469. @@ -646,25 +1144,26 @@
  21470. open_softirq(HI_SOFTIRQ, tasklet_hi_action);
  21471. }
  21472. -static int ksoftirqd_should_run(unsigned int cpu)
  21473. -{
  21474. - return local_softirq_pending();
  21475. -}
  21476. -
  21477. -static void run_ksoftirqd(unsigned int cpu)
  21478. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  21479. +void tasklet_unlock_wait(struct tasklet_struct *t)
  21480. {
  21481. - local_irq_disable();
  21482. - if (local_softirq_pending()) {
  21483. + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
  21484. /*
  21485. - * We can safely run softirq on inline stack, as we are not deep
  21486. - * in the task stack here.
  21487. + * Hack for now to avoid this busy-loop:
  21488. */
  21489. - __do_softirq();
  21490. - local_irq_enable();
  21491. - cond_resched_rcu_qs();
  21492. - return;
  21493. +#ifdef CONFIG_PREEMPT_RT_FULL
  21494. + msleep(1);
  21495. +#else
  21496. + barrier();
  21497. +#endif
  21498. }
  21499. - local_irq_enable();
  21500. +}
  21501. +EXPORT_SYMBOL(tasklet_unlock_wait);
  21502. +#endif
  21503. +
  21504. +static int ksoftirqd_should_run(unsigned int cpu)
  21505. +{
  21506. + return ksoftirqd_softirq_pending();
  21507. }
  21508. #ifdef CONFIG_HOTPLUG_CPU
  21509. @@ -746,16 +1245,31 @@
  21510. static struct smp_hotplug_thread softirq_threads = {
  21511. .store = &ksoftirqd,
  21512. + .setup = ksoftirqd_set_sched_params,
  21513. .thread_should_run = ksoftirqd_should_run,
  21514. .thread_fn = run_ksoftirqd,
  21515. .thread_comm = "ksoftirqd/%u",
  21516. };
  21517. +#ifdef CONFIG_PREEMPT_RT_FULL
  21518. +static struct smp_hotplug_thread softirq_timer_threads = {
  21519. + .store = &ktimer_softirqd,
  21520. + .setup = ktimer_softirqd_set_sched_params,
  21521. + .cleanup = ktimer_softirqd_clr_sched_params,
  21522. + .thread_should_run = ktimer_softirqd_should_run,
  21523. + .thread_fn = run_ksoftirqd,
  21524. + .thread_comm = "ktimersoftd/%u",
  21525. +};
  21526. +#endif
  21527. +
  21528. static __init int spawn_ksoftirqd(void)
  21529. {
  21530. register_cpu_notifier(&cpu_nfb);
  21531. BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
  21532. +#ifdef CONFIG_PREEMPT_RT_FULL
  21533. + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads));
  21534. +#endif
  21535. return 0;
  21536. }
  21537. diff -Nur linux-4.1.39.orig/kernel/stop_machine.c linux-4.1.39/kernel/stop_machine.c
  21538. --- linux-4.1.39.orig/kernel/stop_machine.c 2017-03-13 21:04:36.000000000 +0100
  21539. +++ linux-4.1.39/kernel/stop_machine.c 2017-04-18 17:56:30.637398061 +0200
  21540. @@ -35,7 +35,7 @@
  21541. /* the actual stopper, one per every possible cpu, enabled on online cpus */
  21542. struct cpu_stopper {
  21543. - spinlock_t lock;
  21544. + raw_spinlock_t lock;
  21545. bool enabled; /* is this stopper enabled? */
  21546. struct list_head works; /* list of pending works */
  21547. };
  21548. @@ -78,7 +78,7 @@
  21549. unsigned long flags;
  21550. - spin_lock_irqsave(&stopper->lock, flags);
  21551. + raw_spin_lock_irqsave(&stopper->lock, flags);
  21552. if (stopper->enabled) {
  21553. list_add_tail(&work->list, &stopper->works);
  21554. @@ -86,7 +86,7 @@
  21555. } else
  21556. cpu_stop_signal_done(work->done, false);
  21557. - spin_unlock_irqrestore(&stopper->lock, flags);
  21558. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  21559. }
  21560. /**
  21561. @@ -248,7 +248,7 @@
  21562. struct irq_cpu_stop_queue_work_info call_args;
  21563. struct multi_stop_data msdata;
  21564. - preempt_disable();
  21565. + preempt_disable_nort();
  21566. msdata = (struct multi_stop_data){
  21567. .fn = fn,
  21568. .data = arg,
  21569. @@ -281,7 +281,7 @@
  21570. * This relies on the stopper workqueues to be FIFO.
  21571. */
  21572. if (!cpu_active(cpu1) || !cpu_active(cpu2)) {
  21573. - preempt_enable();
  21574. + preempt_enable_nort();
  21575. return -ENOENT;
  21576. }
  21577. @@ -295,7 +295,7 @@
  21578. &irq_cpu_stop_queue_work,
  21579. &call_args, 1);
  21580. lg_local_unlock(&stop_cpus_lock);
  21581. - preempt_enable();
  21582. + preempt_enable_nort();
  21583. wait_for_completion(&done.completion);
  21584. @@ -329,7 +329,7 @@
  21585. static void queue_stop_cpus_work(const struct cpumask *cpumask,
  21586. cpu_stop_fn_t fn, void *arg,
  21587. - struct cpu_stop_done *done)
  21588. + struct cpu_stop_done *done, bool inactive)
  21589. {
  21590. struct cpu_stop_work *work;
  21591. unsigned int cpu;
  21592. @@ -343,11 +343,13 @@
  21593. }
  21594. /*
  21595. - * Disable preemption while queueing to avoid getting
  21596. - * preempted by a stopper which might wait for other stoppers
  21597. - * to enter @fn which can lead to deadlock.
  21598. + * Make sure that all work is queued on all cpus before
  21599. + * any of the cpus can execute it.
  21600. */
  21601. - lg_global_lock(&stop_cpus_lock);
  21602. + if (!inactive)
  21603. + lg_global_lock(&stop_cpus_lock);
  21604. + else
  21605. + lg_global_trylock_relax(&stop_cpus_lock);
  21606. for_each_cpu(cpu, cpumask)
  21607. cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
  21608. lg_global_unlock(&stop_cpus_lock);
  21609. @@ -359,7 +361,7 @@
  21610. struct cpu_stop_done done;
  21611. cpu_stop_init_done(&done, cpumask_weight(cpumask));
  21612. - queue_stop_cpus_work(cpumask, fn, arg, &done);
  21613. + queue_stop_cpus_work(cpumask, fn, arg, &done, false);
  21614. wait_for_completion(&done.completion);
  21615. return done.executed ? done.ret : -ENOENT;
  21616. }
  21617. @@ -439,9 +441,9 @@
  21618. unsigned long flags;
  21619. int run;
  21620. - spin_lock_irqsave(&stopper->lock, flags);
  21621. + raw_spin_lock_irqsave(&stopper->lock, flags);
  21622. run = !list_empty(&stopper->works);
  21623. - spin_unlock_irqrestore(&stopper->lock, flags);
  21624. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  21625. return run;
  21626. }
  21627. @@ -453,13 +455,13 @@
  21628. repeat:
  21629. work = NULL;
  21630. - spin_lock_irq(&stopper->lock);
  21631. + raw_spin_lock_irq(&stopper->lock);
  21632. if (!list_empty(&stopper->works)) {
  21633. work = list_first_entry(&stopper->works,
  21634. struct cpu_stop_work, list);
  21635. list_del_init(&work->list);
  21636. }
  21637. - spin_unlock_irq(&stopper->lock);
  21638. + raw_spin_unlock_irq(&stopper->lock);
  21639. if (work) {
  21640. cpu_stop_fn_t fn = work->fn;
  21641. @@ -467,6 +469,16 @@
  21642. struct cpu_stop_done *done = work->done;
  21643. char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
  21644. + /*
  21645. + * Wait until the stopper finished scheduling on all
  21646. + * cpus
  21647. + */
  21648. + lg_global_lock(&stop_cpus_lock);
  21649. + /*
  21650. + * Let other cpu threads continue as well
  21651. + */
  21652. + lg_global_unlock(&stop_cpus_lock);
  21653. +
  21654. /* cpu stop callbacks are not allowed to sleep */
  21655. preempt_disable();
  21656. @@ -500,20 +512,20 @@
  21657. unsigned long flags;
  21658. /* drain remaining works */
  21659. - spin_lock_irqsave(&stopper->lock, flags);
  21660. + raw_spin_lock_irqsave(&stopper->lock, flags);
  21661. list_for_each_entry(work, &stopper->works, list)
  21662. cpu_stop_signal_done(work->done, false);
  21663. stopper->enabled = false;
  21664. - spin_unlock_irqrestore(&stopper->lock, flags);
  21665. + raw_spin_unlock_irqrestore(&stopper->lock, flags);
  21666. }
  21667. static void cpu_stop_unpark(unsigned int cpu)
  21668. {
  21669. struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  21670. - spin_lock_irq(&stopper->lock);
  21671. + raw_spin_lock_irq(&stopper->lock);
  21672. stopper->enabled = true;
  21673. - spin_unlock_irq(&stopper->lock);
  21674. + raw_spin_unlock_irq(&stopper->lock);
  21675. }
  21676. static struct smp_hotplug_thread cpu_stop_threads = {
  21677. @@ -535,10 +547,12 @@
  21678. for_each_possible_cpu(cpu) {
  21679. struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  21680. - spin_lock_init(&stopper->lock);
  21681. + raw_spin_lock_init(&stopper->lock);
  21682. INIT_LIST_HEAD(&stopper->works);
  21683. }
  21684. + lg_lock_init(&stop_cpus_lock, "stop_cpus_lock");
  21685. +
  21686. BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
  21687. stop_machine_initialized = true;
  21688. return 0;
  21689. @@ -634,7 +648,7 @@
  21690. set_state(&msdata, MULTI_STOP_PREPARE);
  21691. cpu_stop_init_done(&done, num_active_cpus());
  21692. queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
  21693. - &done);
  21694. + &done, true);
  21695. ret = multi_cpu_stop(&msdata);
  21696. /* Busy wait for completion. */
  21697. diff -Nur linux-4.1.39.orig/kernel/time/hrtimer.c linux-4.1.39/kernel/time/hrtimer.c
  21698. --- linux-4.1.39.orig/kernel/time/hrtimer.c 2017-03-13 21:04:36.000000000 +0100
  21699. +++ linux-4.1.39/kernel/time/hrtimer.c 2017-04-18 17:56:30.637398061 +0200
  21700. @@ -48,11 +48,13 @@
  21701. #include <linux/sched/rt.h>
  21702. #include <linux/sched/deadline.h>
  21703. #include <linux/timer.h>
  21704. +#include <linux/kthread.h>
  21705. #include <linux/freezer.h>
  21706. #include <asm/uaccess.h>
  21707. #include <trace/events/timer.h>
  21708. +#include <trace/events/hist.h>
  21709. #include "tick-internal.h"
  21710. @@ -576,8 +578,7 @@
  21711. * When the callback is running, we do not reprogram the clock event
  21712. * device. The timer callback is either running on a different CPU or
  21713. * the callback is executed in the hrtimer_interrupt context. The
  21714. - * reprogramming is handled either by the softirq, which called the
  21715. - * callback or at the end of the hrtimer_interrupt.
  21716. + * reprogramming is handled at the end of the hrtimer_interrupt.
  21717. */
  21718. if (hrtimer_callback_running(timer))
  21719. return 0;
  21720. @@ -621,6 +622,9 @@
  21721. return res;
  21722. }
  21723. +static void __run_hrtimer(struct hrtimer *timer, ktime_t *now);
  21724. +static int hrtimer_rt_defer(struct hrtimer *timer);
  21725. +
  21726. /*
  21727. * Initialize the high resolution related parts of cpu_base
  21728. */
  21729. @@ -630,6 +634,21 @@
  21730. base->hres_active = 0;
  21731. }
  21732. +static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
  21733. + struct hrtimer_clock_base *base,
  21734. + int wakeup)
  21735. +{
  21736. + if (!hrtimer_reprogram(timer, base))
  21737. + return 0;
  21738. + if (!wakeup)
  21739. + return -ETIME;
  21740. +#ifdef CONFIG_PREEMPT_RT_BASE
  21741. + if (!hrtimer_rt_defer(timer))
  21742. + return -ETIME;
  21743. +#endif
  21744. + return 1;
  21745. +}
  21746. +
  21747. static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
  21748. {
  21749. ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
  21750. @@ -695,6 +714,44 @@
  21751. static DECLARE_WORK(hrtimer_work, clock_was_set_work);
  21752. +#ifdef CONFIG_PREEMPT_RT_FULL
  21753. +/*
  21754. + * RT can not call schedule_work from real interrupt context.
  21755. + * Need to make a thread to do the real work.
  21756. + */
  21757. +static struct task_struct *clock_set_delay_thread;
  21758. +static bool do_clock_set_delay;
  21759. +
  21760. +static int run_clock_set_delay(void *ignore)
  21761. +{
  21762. + while (!kthread_should_stop()) {
  21763. + set_current_state(TASK_INTERRUPTIBLE);
  21764. + if (do_clock_set_delay) {
  21765. + do_clock_set_delay = false;
  21766. + schedule_work(&hrtimer_work);
  21767. + }
  21768. + schedule();
  21769. + }
  21770. + __set_current_state(TASK_RUNNING);
  21771. + return 0;
  21772. +}
  21773. +
  21774. +void clock_was_set_delayed(void)
  21775. +{
  21776. + do_clock_set_delay = true;
  21777. + /* Make visible before waking up process */
  21778. + smp_wmb();
  21779. + wake_up_process(clock_set_delay_thread);
  21780. +}
  21781. +
  21782. +static __init int create_clock_set_delay_thread(void)
  21783. +{
  21784. + clock_set_delay_thread = kthread_run(run_clock_set_delay, NULL, "kclksetdelayd");
  21785. + BUG_ON(!clock_set_delay_thread);
  21786. + return 0;
  21787. +}
  21788. +early_initcall(create_clock_set_delay_thread);
  21789. +#else /* PREEMPT_RT_FULL */
  21790. /*
  21791. * Called from timekeeping and resume code to reprogramm the hrtimer
  21792. * interrupt device on all cpus.
  21793. @@ -703,6 +760,7 @@
  21794. {
  21795. schedule_work(&hrtimer_work);
  21796. }
  21797. +#endif
  21798. #else
  21799. @@ -711,6 +769,13 @@
  21800. static inline int hrtimer_switch_to_hres(void) { return 0; }
  21801. static inline void
  21802. hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
  21803. +static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
  21804. + struct hrtimer_clock_base *base,
  21805. + int wakeup)
  21806. +{
  21807. + return 0;
  21808. +}
  21809. +
  21810. static inline int hrtimer_reprogram(struct hrtimer *timer,
  21811. struct hrtimer_clock_base *base)
  21812. {
  21813. @@ -718,7 +783,6 @@
  21814. }
  21815. static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
  21816. static inline void retrigger_next_event(void *arg) { }
  21817. -
  21818. #endif /* CONFIG_HIGH_RES_TIMERS */
  21819. /*
  21820. @@ -836,6 +900,32 @@
  21821. }
  21822. EXPORT_SYMBOL_GPL(hrtimer_forward);
  21823. +#ifdef CONFIG_PREEMPT_RT_BASE
  21824. +# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
  21825. +
  21826. +/**
  21827. + * hrtimer_wait_for_timer - Wait for a running timer
  21828. + *
  21829. + * @timer: timer to wait for
  21830. + *
  21831. + * The function waits in case the timers callback function is
  21832. + * currently executed on the waitqueue of the timer base. The
  21833. + * waitqueue is woken up after the timer callback function has
  21834. + * finished execution.
  21835. + */
  21836. +void hrtimer_wait_for_timer(const struct hrtimer *timer)
  21837. +{
  21838. + struct hrtimer_clock_base *base = timer->base;
  21839. +
  21840. + if (base && base->cpu_base && !timer->irqsafe)
  21841. + wait_event(base->cpu_base->wait,
  21842. + !(timer->state & HRTIMER_STATE_CALLBACK));
  21843. +}
  21844. +
  21845. +#else
  21846. +# define wake_up_timer_waiters(b) do { } while (0)
  21847. +#endif
  21848. +
  21849. /*
  21850. * enqueue_hrtimer - internal function to (re)start a timer
  21851. *
  21852. @@ -879,6 +969,11 @@
  21853. if (!(timer->state & HRTIMER_STATE_ENQUEUED))
  21854. goto out;
  21855. + if (unlikely(!list_empty(&timer->cb_entry))) {
  21856. + list_del_init(&timer->cb_entry);
  21857. + goto out;
  21858. + }
  21859. +
  21860. next_timer = timerqueue_getnext(&base->active);
  21861. timerqueue_del(&base->active, &timer->node);
  21862. if (&timer->node == next_timer) {
  21863. @@ -966,7 +1061,16 @@
  21864. new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
  21865. timer_stats_hrtimer_set_start_info(timer);
  21866. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  21867. + {
  21868. + ktime_t now = new_base->get_time();
  21869. + if (ktime_to_ns(tim) < ktime_to_ns(now))
  21870. + timer->praecox = now;
  21871. + else
  21872. + timer->praecox = ktime_set(0, 0);
  21873. + }
  21874. +#endif
  21875. leftmost = enqueue_hrtimer(timer, new_base);
  21876. if (!leftmost) {
  21877. @@ -980,15 +1084,26 @@
  21878. * on dynticks target.
  21879. */
  21880. wake_up_nohz_cpu(new_base->cpu_base->cpu);
  21881. - } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases) &&
  21882. - hrtimer_reprogram(timer, new_base)) {
  21883. + } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases)) {
  21884. +
  21885. + ret = hrtimer_enqueue_reprogram(timer, new_base, wakeup);
  21886. + if (ret < 0) {
  21887. + /*
  21888. + * In case we failed to reprogram the timer (mostly
  21889. + * because out current timer is already elapsed),
  21890. + * remove it again and report a failure. This avoids
  21891. + * stale base->first entries.
  21892. + */
  21893. + debug_deactivate(timer);
  21894. + __remove_hrtimer(timer, new_base,
  21895. + timer->state & HRTIMER_STATE_CALLBACK, 0);
  21896. + } else if (ret > 0) {
  21897. /*
  21898. * Only allow reprogramming if the new base is on this CPU.
  21899. * (it might still be on another CPU if the timer was pending)
  21900. *
  21901. * XXX send_remote_softirq() ?
  21902. */
  21903. - if (wakeup) {
  21904. /*
  21905. * We need to drop cpu_base->lock to avoid a
  21906. * lock ordering issue vs. rq->lock.
  21907. @@ -996,9 +1111,7 @@
  21908. raw_spin_unlock(&new_base->cpu_base->lock);
  21909. raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  21910. local_irq_restore(flags);
  21911. - return ret;
  21912. - } else {
  21913. - __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  21914. + return 0;
  21915. }
  21916. }
  21917. @@ -1089,7 +1202,7 @@
  21918. if (ret >= 0)
  21919. return ret;
  21920. - cpu_relax();
  21921. + hrtimer_wait_for_timer(timer);
  21922. }
  21923. }
  21924. EXPORT_SYMBOL_GPL(hrtimer_cancel);
  21925. @@ -1153,6 +1266,7 @@
  21926. base = hrtimer_clockid_to_base(clock_id);
  21927. timer->base = &cpu_base->clock_base[base];
  21928. + INIT_LIST_HEAD(&timer->cb_entry);
  21929. timerqueue_init(&timer->node);
  21930. #ifdef CONFIG_TIMER_STATS
  21931. @@ -1236,6 +1350,126 @@
  21932. timer->state &= ~HRTIMER_STATE_CALLBACK;
  21933. }
  21934. +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
  21935. +
  21936. +#ifdef CONFIG_PREEMPT_RT_BASE
  21937. +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
  21938. + struct hrtimer_clock_base *base)
  21939. +{
  21940. + /*
  21941. + * Note, we clear the callback flag before we requeue the
  21942. + * timer otherwise we trigger the callback_running() check
  21943. + * in hrtimer_reprogram().
  21944. + */
  21945. + timer->state &= ~HRTIMER_STATE_CALLBACK;
  21946. +
  21947. + if (restart != HRTIMER_NORESTART) {
  21948. + BUG_ON(hrtimer_active(timer));
  21949. + /*
  21950. + * Enqueue the timer, if it's the leftmost timer then
  21951. + * we need to reprogram it.
  21952. + */
  21953. + if (!enqueue_hrtimer(timer, base))
  21954. + return;
  21955. +
  21956. +#ifndef CONFIG_HIGH_RES_TIMERS
  21957. + }
  21958. +#else
  21959. + if (base->cpu_base->hres_active &&
  21960. + hrtimer_reprogram(timer, base))
  21961. + goto requeue;
  21962. +
  21963. + } else if (hrtimer_active(timer)) {
  21964. + /*
  21965. + * If the timer was rearmed on another CPU, reprogram
  21966. + * the event device.
  21967. + */
  21968. + if (&timer->node == base->active.next &&
  21969. + base->cpu_base->hres_active &&
  21970. + hrtimer_reprogram(timer, base))
  21971. + goto requeue;
  21972. + }
  21973. + return;
  21974. +
  21975. +requeue:
  21976. + /*
  21977. + * Timer is expired. Thus move it from tree to pending list
  21978. + * again.
  21979. + */
  21980. + __remove_hrtimer(timer, base, timer->state, 0);
  21981. + list_add_tail(&timer->cb_entry, &base->expired);
  21982. +#endif
  21983. +}
  21984. +
  21985. +/*
  21986. + * The changes in mainline which removed the callback modes from
  21987. + * hrtimer are not yet working with -rt. The non wakeup_process()
  21988. + * based callbacks which involve sleeping locks need to be treated
  21989. + * seperately.
  21990. + */
  21991. +static void hrtimer_rt_run_pending(void)
  21992. +{
  21993. + enum hrtimer_restart (*fn)(struct hrtimer *);
  21994. + struct hrtimer_cpu_base *cpu_base;
  21995. + struct hrtimer_clock_base *base;
  21996. + struct hrtimer *timer;
  21997. + int index, restart;
  21998. +
  21999. + local_irq_disable();
  22000. + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
  22001. +
  22002. + raw_spin_lock(&cpu_base->lock);
  22003. +
  22004. + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
  22005. + base = &cpu_base->clock_base[index];
  22006. +
  22007. + while (!list_empty(&base->expired)) {
  22008. + timer = list_first_entry(&base->expired,
  22009. + struct hrtimer, cb_entry);
  22010. +
  22011. + /*
  22012. + * Same as the above __run_hrtimer function
  22013. + * just we run with interrupts enabled.
  22014. + */
  22015. + debug_hrtimer_deactivate(timer);
  22016. + __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
  22017. + timer_stats_account_hrtimer(timer);
  22018. + fn = timer->function;
  22019. +
  22020. + raw_spin_unlock_irq(&cpu_base->lock);
  22021. + restart = fn(timer);
  22022. + raw_spin_lock_irq(&cpu_base->lock);
  22023. +
  22024. + hrtimer_rt_reprogram(restart, timer, base);
  22025. + }
  22026. + }
  22027. +
  22028. + raw_spin_unlock_irq(&cpu_base->lock);
  22029. +
  22030. + wake_up_timer_waiters(cpu_base);
  22031. +}
  22032. +
  22033. +static int hrtimer_rt_defer(struct hrtimer *timer)
  22034. +{
  22035. + if (timer->irqsafe)
  22036. + return 0;
  22037. +
  22038. + __remove_hrtimer(timer, timer->base, timer->state, 0);
  22039. + list_add_tail(&timer->cb_entry, &timer->base->expired);
  22040. + return 1;
  22041. +}
  22042. +
  22043. +#else
  22044. +
  22045. +static inline void hrtimer_rt_run_pending(void)
  22046. +{
  22047. + hrtimer_peek_ahead_timers();
  22048. +}
  22049. +
  22050. +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
  22051. +
  22052. +#endif
  22053. +
  22054. #ifdef CONFIG_HIGH_RES_TIMERS
  22055. /*
  22056. @@ -1246,7 +1480,7 @@
  22057. {
  22058. struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
  22059. ktime_t expires_next, now, entry_time, delta;
  22060. - int i, retries = 0;
  22061. + int i, retries = 0, raise = 0;
  22062. BUG_ON(!cpu_base->hres_active);
  22063. cpu_base->nr_events++;
  22064. @@ -1281,6 +1515,15 @@
  22065. timer = container_of(node, struct hrtimer, node);
  22066. + trace_hrtimer_interrupt(raw_smp_processor_id(),
  22067. + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ?
  22068. + timer->praecox : hrtimer_get_expires(timer),
  22069. + basenow)),
  22070. + current,
  22071. + timer->function == hrtimer_wakeup ?
  22072. + container_of(timer, struct hrtimer_sleeper,
  22073. + timer)->task : NULL);
  22074. +
  22075. /*
  22076. * The immediate goal for using the softexpires is
  22077. * minimizing wakeups, not running timers at the
  22078. @@ -1296,7 +1539,10 @@
  22079. if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
  22080. break;
  22081. - __run_hrtimer(timer, &basenow);
  22082. + if (!hrtimer_rt_defer(timer))
  22083. + __run_hrtimer(timer, &basenow);
  22084. + else
  22085. + raise = 1;
  22086. }
  22087. }
  22088. /* Reevaluate the clock bases for the next expiry */
  22089. @@ -1313,7 +1559,7 @@
  22090. if (expires_next.tv64 == KTIME_MAX ||
  22091. !tick_program_event(expires_next, 0)) {
  22092. cpu_base->hang_detected = 0;
  22093. - return;
  22094. + goto out;
  22095. }
  22096. /*
  22097. @@ -1357,6 +1603,9 @@
  22098. tick_program_event(expires_next, 1);
  22099. printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
  22100. ktime_to_ns(delta));
  22101. +out:
  22102. + if (raise)
  22103. + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  22104. }
  22105. /*
  22106. @@ -1392,18 +1641,18 @@
  22107. __hrtimer_peek_ahead_timers();
  22108. local_irq_restore(flags);
  22109. }
  22110. -
  22111. -static void run_hrtimer_softirq(struct softirq_action *h)
  22112. -{
  22113. - hrtimer_peek_ahead_timers();
  22114. -}
  22115. -
  22116. #else /* CONFIG_HIGH_RES_TIMERS */
  22117. static inline void __hrtimer_peek_ahead_timers(void) { }
  22118. #endif /* !CONFIG_HIGH_RES_TIMERS */
  22119. +
  22120. +static void run_hrtimer_softirq(struct softirq_action *h)
  22121. +{
  22122. + hrtimer_rt_run_pending();
  22123. +}
  22124. +
  22125. /*
  22126. * Called from timer softirq every jiffy, expire hrtimers:
  22127. *
  22128. @@ -1436,7 +1685,7 @@
  22129. struct timerqueue_node *node;
  22130. struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
  22131. struct hrtimer_clock_base *base;
  22132. - int index, gettime = 1;
  22133. + int index, gettime = 1, raise = 0;
  22134. if (hrtimer_hres_active())
  22135. return;
  22136. @@ -1461,10 +1710,16 @@
  22137. hrtimer_get_expires_tv64(timer))
  22138. break;
  22139. - __run_hrtimer(timer, &base->softirq_time);
  22140. + if (!hrtimer_rt_defer(timer))
  22141. + __run_hrtimer(timer, &base->softirq_time);
  22142. + else
  22143. + raise = 1;
  22144. }
  22145. raw_spin_unlock(&cpu_base->lock);
  22146. }
  22147. +
  22148. + if (raise)
  22149. + raise_softirq_irqoff(HRTIMER_SOFTIRQ);
  22150. }
  22151. /*
  22152. @@ -1486,16 +1741,18 @@
  22153. void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
  22154. {
  22155. sl->timer.function = hrtimer_wakeup;
  22156. + sl->timer.irqsafe = 1;
  22157. sl->task = task;
  22158. }
  22159. EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
  22160. -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
  22161. +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
  22162. + unsigned long state)
  22163. {
  22164. hrtimer_init_sleeper(t, current);
  22165. do {
  22166. - set_current_state(TASK_INTERRUPTIBLE);
  22167. + set_current_state(state);
  22168. hrtimer_start_expires(&t->timer, mode);
  22169. if (!hrtimer_active(&t->timer))
  22170. t->task = NULL;
  22171. @@ -1539,7 +1796,8 @@
  22172. HRTIMER_MODE_ABS);
  22173. hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
  22174. - if (do_nanosleep(&t, HRTIMER_MODE_ABS))
  22175. + /* cpu_chill() does not care about restart state. */
  22176. + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
  22177. goto out;
  22178. rmtp = restart->nanosleep.rmtp;
  22179. @@ -1556,8 +1814,10 @@
  22180. return ret;
  22181. }
  22182. -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  22183. - const enum hrtimer_mode mode, const clockid_t clockid)
  22184. +static long
  22185. +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  22186. + const enum hrtimer_mode mode, const clockid_t clockid,
  22187. + unsigned long state)
  22188. {
  22189. struct restart_block *restart;
  22190. struct hrtimer_sleeper t;
  22191. @@ -1570,7 +1830,7 @@
  22192. hrtimer_init_on_stack(&t.timer, clockid, mode);
  22193. hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
  22194. - if (do_nanosleep(&t, mode))
  22195. + if (do_nanosleep(&t, mode, state))
  22196. goto out;
  22197. /* Absolute timers do not update the rmtp value and restart: */
  22198. @@ -1597,6 +1857,12 @@
  22199. return ret;
  22200. }
  22201. +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
  22202. + const enum hrtimer_mode mode, const clockid_t clockid)
  22203. +{
  22204. + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
  22205. +}
  22206. +
  22207. SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
  22208. struct timespec __user *, rmtp)
  22209. {
  22210. @@ -1611,6 +1877,26 @@
  22211. return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
  22212. }
  22213. +#ifdef CONFIG_PREEMPT_RT_FULL
  22214. +/*
  22215. + * Sleep for 1 ms in hope whoever holds what we want will let it go.
  22216. + */
  22217. +void cpu_chill(void)
  22218. +{
  22219. + struct timespec tu = {
  22220. + .tv_nsec = NSEC_PER_MSEC,
  22221. + };
  22222. + unsigned int freeze_flag = current->flags & PF_NOFREEZE;
  22223. +
  22224. + current->flags |= PF_NOFREEZE;
  22225. + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
  22226. + TASK_UNINTERRUPTIBLE);
  22227. + if (!freeze_flag)
  22228. + current->flags &= ~PF_NOFREEZE;
  22229. +}
  22230. +EXPORT_SYMBOL(cpu_chill);
  22231. +#endif
  22232. +
  22233. /*
  22234. * Functions related to boot-time initialization:
  22235. */
  22236. @@ -1622,10 +1908,14 @@
  22237. for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
  22238. cpu_base->clock_base[i].cpu_base = cpu_base;
  22239. timerqueue_init_head(&cpu_base->clock_base[i].active);
  22240. + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
  22241. }
  22242. cpu_base->cpu = cpu;
  22243. hrtimer_init_hres(cpu_base);
  22244. +#ifdef CONFIG_PREEMPT_RT_BASE
  22245. + init_waitqueue_head(&cpu_base->wait);
  22246. +#endif
  22247. }
  22248. #ifdef CONFIG_HOTPLUG_CPU
  22249. @@ -1731,9 +2021,7 @@
  22250. hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
  22251. (void *)(long)smp_processor_id());
  22252. register_cpu_notifier(&hrtimers_nb);
  22253. -#ifdef CONFIG_HIGH_RES_TIMERS
  22254. open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
  22255. -#endif
  22256. }
  22257. /**
  22258. diff -Nur linux-4.1.39.orig/kernel/time/itimer.c linux-4.1.39/kernel/time/itimer.c
  22259. --- linux-4.1.39.orig/kernel/time/itimer.c 2017-03-13 21:04:36.000000000 +0100
  22260. +++ linux-4.1.39/kernel/time/itimer.c 2017-04-18 17:56:30.637398061 +0200
  22261. @@ -213,6 +213,7 @@
  22262. /* We are sharing ->siglock with it_real_fn() */
  22263. if (hrtimer_try_to_cancel(timer) < 0) {
  22264. spin_unlock_irq(&tsk->sighand->siglock);
  22265. + hrtimer_wait_for_timer(&tsk->signal->real_timer);
  22266. goto again;
  22267. }
  22268. expires = timeval_to_ktime(value->it_value);
  22269. diff -Nur linux-4.1.39.orig/kernel/time/jiffies.c linux-4.1.39/kernel/time/jiffies.c
  22270. --- linux-4.1.39.orig/kernel/time/jiffies.c 2017-03-13 21:04:36.000000000 +0100
  22271. +++ linux-4.1.39/kernel/time/jiffies.c 2017-04-18 17:56:30.637398061 +0200
  22272. @@ -74,7 +74,8 @@
  22273. .max_cycles = 10,
  22274. };
  22275. -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
  22276. +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
  22277. +__cacheline_aligned_in_smp seqcount_t jiffies_seq;
  22278. #if (BITS_PER_LONG < 64)
  22279. u64 get_jiffies_64(void)
  22280. @@ -83,9 +84,9 @@
  22281. u64 ret;
  22282. do {
  22283. - seq = read_seqbegin(&jiffies_lock);
  22284. + seq = read_seqcount_begin(&jiffies_seq);
  22285. ret = jiffies_64;
  22286. - } while (read_seqretry(&jiffies_lock, seq));
  22287. + } while (read_seqcount_retry(&jiffies_seq, seq));
  22288. return ret;
  22289. }
  22290. EXPORT_SYMBOL(get_jiffies_64);
  22291. diff -Nur linux-4.1.39.orig/kernel/time/ntp.c linux-4.1.39/kernel/time/ntp.c
  22292. --- linux-4.1.39.orig/kernel/time/ntp.c 2017-03-13 21:04:36.000000000 +0100
  22293. +++ linux-4.1.39/kernel/time/ntp.c 2017-04-18 17:56:30.637398061 +0200
  22294. @@ -10,6 +10,7 @@
  22295. #include <linux/workqueue.h>
  22296. #include <linux/hrtimer.h>
  22297. #include <linux/jiffies.h>
  22298. +#include <linux/kthread.h>
  22299. #include <linux/math64.h>
  22300. #include <linux/timex.h>
  22301. #include <linux/time.h>
  22302. @@ -529,10 +530,52 @@
  22303. &sync_cmos_work, timespec_to_jiffies(&next));
  22304. }
  22305. +#ifdef CONFIG_PREEMPT_RT_FULL
  22306. +/*
  22307. + * RT can not call schedule_delayed_work from real interrupt context.
  22308. + * Need to make a thread to do the real work.
  22309. + */
  22310. +static struct task_struct *cmos_delay_thread;
  22311. +static bool do_cmos_delay;
  22312. +
  22313. +static int run_cmos_delay(void *ignore)
  22314. +{
  22315. + while (!kthread_should_stop()) {
  22316. + set_current_state(TASK_INTERRUPTIBLE);
  22317. + if (do_cmos_delay) {
  22318. + do_cmos_delay = false;
  22319. + queue_delayed_work(system_power_efficient_wq,
  22320. + &sync_cmos_work, 0);
  22321. + }
  22322. + schedule();
  22323. + }
  22324. + __set_current_state(TASK_RUNNING);
  22325. + return 0;
  22326. +}
  22327. +
  22328. +void ntp_notify_cmos_timer(void)
  22329. +{
  22330. + do_cmos_delay = true;
  22331. + /* Make visible before waking up process */
  22332. + smp_wmb();
  22333. + wake_up_process(cmos_delay_thread);
  22334. +}
  22335. +
  22336. +static __init int create_cmos_delay_thread(void)
  22337. +{
  22338. + cmos_delay_thread = kthread_run(run_cmos_delay, NULL, "kcmosdelayd");
  22339. + BUG_ON(!cmos_delay_thread);
  22340. + return 0;
  22341. +}
  22342. +early_initcall(create_cmos_delay_thread);
  22343. +
  22344. +#else
  22345. +
  22346. void ntp_notify_cmos_timer(void)
  22347. {
  22348. queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
  22349. }
  22350. +#endif /* CONFIG_PREEMPT_RT_FULL */
  22351. #else
  22352. void ntp_notify_cmos_timer(void) { }
  22353. diff -Nur linux-4.1.39.orig/kernel/time/posix-cpu-timers.c linux-4.1.39/kernel/time/posix-cpu-timers.c
  22354. --- linux-4.1.39.orig/kernel/time/posix-cpu-timers.c 2017-03-13 21:04:36.000000000 +0100
  22355. +++ linux-4.1.39/kernel/time/posix-cpu-timers.c 2017-04-18 17:56:30.641398216 +0200
  22356. @@ -3,6 +3,7 @@
  22357. */
  22358. #include <linux/sched.h>
  22359. +#include <linux/sched/rt.h>
  22360. #include <linux/posix-timers.h>
  22361. #include <linux/errno.h>
  22362. #include <linux/math64.h>
  22363. @@ -626,7 +627,7 @@
  22364. /*
  22365. * Disarm any old timer after extracting its expiry time.
  22366. */
  22367. - WARN_ON_ONCE(!irqs_disabled());
  22368. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  22369. ret = 0;
  22370. old_incr = timer->it.cpu.incr;
  22371. @@ -1048,7 +1049,7 @@
  22372. /*
  22373. * Now re-arm for the new expiry time.
  22374. */
  22375. - WARN_ON_ONCE(!irqs_disabled());
  22376. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  22377. arm_timer(timer);
  22378. unlock_task_sighand(p, &flags);
  22379. @@ -1114,10 +1115,11 @@
  22380. sig = tsk->signal;
  22381. if (sig->cputimer.running) {
  22382. struct task_cputime group_sample;
  22383. + unsigned long flags;
  22384. - raw_spin_lock(&sig->cputimer.lock);
  22385. + raw_spin_lock_irqsave(&sig->cputimer.lock, flags);
  22386. group_sample = sig->cputimer.cputime;
  22387. - raw_spin_unlock(&sig->cputimer.lock);
  22388. + raw_spin_unlock_irqrestore(&sig->cputimer.lock, flags);
  22389. if (task_cputime_expired(&group_sample, &sig->cputime_expires))
  22390. return 1;
  22391. @@ -1131,13 +1133,13 @@
  22392. * already updated our counts. We need to check if any timers fire now.
  22393. * Interrupts are disabled.
  22394. */
  22395. -void run_posix_cpu_timers(struct task_struct *tsk)
  22396. +static void __run_posix_cpu_timers(struct task_struct *tsk)
  22397. {
  22398. LIST_HEAD(firing);
  22399. struct k_itimer *timer, *next;
  22400. unsigned long flags;
  22401. - WARN_ON_ONCE(!irqs_disabled());
  22402. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  22403. /*
  22404. * The fast path checks that there are no expired thread or thread
  22405. @@ -1195,6 +1197,190 @@
  22406. }
  22407. }
  22408. +#ifdef CONFIG_PREEMPT_RT_BASE
  22409. +#include <linux/kthread.h>
  22410. +#include <linux/cpu.h>
  22411. +DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
  22412. +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
  22413. +
  22414. +static int posix_cpu_timers_thread(void *data)
  22415. +{
  22416. + int cpu = (long)data;
  22417. +
  22418. + BUG_ON(per_cpu(posix_timer_task,cpu) != current);
  22419. +
  22420. + while (!kthread_should_stop()) {
  22421. + struct task_struct *tsk = NULL;
  22422. + struct task_struct *next = NULL;
  22423. +
  22424. + if (cpu_is_offline(cpu))
  22425. + goto wait_to_die;
  22426. +
  22427. + /* grab task list */
  22428. + raw_local_irq_disable();
  22429. + tsk = per_cpu(posix_timer_tasklist, cpu);
  22430. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  22431. + raw_local_irq_enable();
  22432. +
  22433. + /* its possible the list is empty, just return */
  22434. + if (!tsk) {
  22435. + set_current_state(TASK_INTERRUPTIBLE);
  22436. + schedule();
  22437. + __set_current_state(TASK_RUNNING);
  22438. + continue;
  22439. + }
  22440. +
  22441. + /* Process task list */
  22442. + while (1) {
  22443. + /* save next */
  22444. + next = tsk->posix_timer_list;
  22445. +
  22446. + /* run the task timers, clear its ptr and
  22447. + * unreference it
  22448. + */
  22449. + __run_posix_cpu_timers(tsk);
  22450. + tsk->posix_timer_list = NULL;
  22451. + put_task_struct(tsk);
  22452. +
  22453. + /* check if this is the last on the list */
  22454. + if (next == tsk)
  22455. + break;
  22456. + tsk = next;
  22457. + }
  22458. + }
  22459. + return 0;
  22460. +
  22461. +wait_to_die:
  22462. + /* Wait for kthread_stop */
  22463. + set_current_state(TASK_INTERRUPTIBLE);
  22464. + while (!kthread_should_stop()) {
  22465. + schedule();
  22466. + set_current_state(TASK_INTERRUPTIBLE);
  22467. + }
  22468. + __set_current_state(TASK_RUNNING);
  22469. + return 0;
  22470. +}
  22471. +
  22472. +static inline int __fastpath_timer_check(struct task_struct *tsk)
  22473. +{
  22474. + /* tsk == current, ensure it is safe to use ->signal/sighand */
  22475. + if (unlikely(tsk->exit_state))
  22476. + return 0;
  22477. +
  22478. + if (!task_cputime_zero(&tsk->cputime_expires))
  22479. + return 1;
  22480. +
  22481. + if (!task_cputime_zero(&tsk->signal->cputime_expires))
  22482. + return 1;
  22483. +
  22484. + return 0;
  22485. +}
  22486. +
  22487. +void run_posix_cpu_timers(struct task_struct *tsk)
  22488. +{
  22489. + unsigned long cpu = smp_processor_id();
  22490. + struct task_struct *tasklist;
  22491. +
  22492. + BUG_ON(!irqs_disabled());
  22493. + if(!per_cpu(posix_timer_task, cpu))
  22494. + return;
  22495. + /* get per-cpu references */
  22496. + tasklist = per_cpu(posix_timer_tasklist, cpu);
  22497. +
  22498. + /* check to see if we're already queued */
  22499. + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
  22500. + get_task_struct(tsk);
  22501. + if (tasklist) {
  22502. + tsk->posix_timer_list = tasklist;
  22503. + } else {
  22504. + /*
  22505. + * The list is terminated by a self-pointing
  22506. + * task_struct
  22507. + */
  22508. + tsk->posix_timer_list = tsk;
  22509. + }
  22510. + per_cpu(posix_timer_tasklist, cpu) = tsk;
  22511. +
  22512. + wake_up_process(per_cpu(posix_timer_task, cpu));
  22513. + }
  22514. +}
  22515. +
  22516. +/*
  22517. + * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
  22518. + * Here we can start up the necessary migration thread for the new CPU.
  22519. + */
  22520. +static int posix_cpu_thread_call(struct notifier_block *nfb,
  22521. + unsigned long action, void *hcpu)
  22522. +{
  22523. + int cpu = (long)hcpu;
  22524. + struct task_struct *p;
  22525. + struct sched_param param;
  22526. +
  22527. + switch (action) {
  22528. + case CPU_UP_PREPARE:
  22529. + p = kthread_create(posix_cpu_timers_thread, hcpu,
  22530. + "posixcputmr/%d",cpu);
  22531. + if (IS_ERR(p))
  22532. + return NOTIFY_BAD;
  22533. + p->flags |= PF_NOFREEZE;
  22534. + kthread_bind(p, cpu);
  22535. + /* Must be high prio to avoid getting starved */
  22536. + param.sched_priority = MAX_RT_PRIO-1;
  22537. + sched_setscheduler(p, SCHED_FIFO, &param);
  22538. + per_cpu(posix_timer_task,cpu) = p;
  22539. + break;
  22540. + case CPU_ONLINE:
  22541. + /* Strictly unneccessary, as first user will wake it. */
  22542. + wake_up_process(per_cpu(posix_timer_task,cpu));
  22543. + break;
  22544. +#ifdef CONFIG_HOTPLUG_CPU
  22545. + case CPU_UP_CANCELED:
  22546. + /* Unbind it from offline cpu so it can run. Fall thru. */
  22547. + kthread_bind(per_cpu(posix_timer_task, cpu),
  22548. + cpumask_any(cpu_online_mask));
  22549. + kthread_stop(per_cpu(posix_timer_task,cpu));
  22550. + per_cpu(posix_timer_task,cpu) = NULL;
  22551. + break;
  22552. + case CPU_DEAD:
  22553. + kthread_stop(per_cpu(posix_timer_task,cpu));
  22554. + per_cpu(posix_timer_task,cpu) = NULL;
  22555. + break;
  22556. +#endif
  22557. + }
  22558. + return NOTIFY_OK;
  22559. +}
  22560. +
  22561. +/* Register at highest priority so that task migration (migrate_all_tasks)
  22562. + * happens before everything else.
  22563. + */
  22564. +static struct notifier_block posix_cpu_thread_notifier = {
  22565. + .notifier_call = posix_cpu_thread_call,
  22566. + .priority = 10
  22567. +};
  22568. +
  22569. +static int __init posix_cpu_thread_init(void)
  22570. +{
  22571. + void *hcpu = (void *)(long)smp_processor_id();
  22572. + /* Start one for boot CPU. */
  22573. + unsigned long cpu;
  22574. +
  22575. + /* init the per-cpu posix_timer_tasklets */
  22576. + for_each_possible_cpu(cpu)
  22577. + per_cpu(posix_timer_tasklist, cpu) = NULL;
  22578. +
  22579. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
  22580. + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
  22581. + register_cpu_notifier(&posix_cpu_thread_notifier);
  22582. + return 0;
  22583. +}
  22584. +early_initcall(posix_cpu_thread_init);
  22585. +#else /* CONFIG_PREEMPT_RT_BASE */
  22586. +void run_posix_cpu_timers(struct task_struct *tsk)
  22587. +{
  22588. + __run_posix_cpu_timers(tsk);
  22589. +}
  22590. +#endif /* CONFIG_PREEMPT_RT_BASE */
  22591. +
  22592. /*
  22593. * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
  22594. * The tsk->sighand->siglock must be held by the caller.
  22595. diff -Nur linux-4.1.39.orig/kernel/time/posix-timers.c linux-4.1.39/kernel/time/posix-timers.c
  22596. --- linux-4.1.39.orig/kernel/time/posix-timers.c 2017-03-13 21:04:36.000000000 +0100
  22597. +++ linux-4.1.39/kernel/time/posix-timers.c 2017-04-18 17:56:30.641398216 +0200
  22598. @@ -499,6 +499,7 @@
  22599. static struct pid *good_sigevent(sigevent_t * event)
  22600. {
  22601. struct task_struct *rtn = current->group_leader;
  22602. + int sig = event->sigev_signo;
  22603. if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
  22604. (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
  22605. @@ -507,7 +508,8 @@
  22606. return NULL;
  22607. if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
  22608. - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
  22609. + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
  22610. + sig_kernel_coredump(sig)))
  22611. return NULL;
  22612. return task_pid(rtn);
  22613. @@ -819,6 +821,20 @@
  22614. return overrun;
  22615. }
  22616. +/*
  22617. + * Protected by RCU!
  22618. + */
  22619. +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
  22620. +{
  22621. +#ifdef CONFIG_PREEMPT_RT_FULL
  22622. + if (kc->timer_set == common_timer_set)
  22623. + hrtimer_wait_for_timer(&timr->it.real.timer);
  22624. + else
  22625. + /* FIXME: Whacky hack for posix-cpu-timers */
  22626. + schedule_timeout(1);
  22627. +#endif
  22628. +}
  22629. +
  22630. /* Set a POSIX.1b interval timer. */
  22631. /* timr->it_lock is taken. */
  22632. static int
  22633. @@ -896,6 +912,7 @@
  22634. if (!timr)
  22635. return -EINVAL;
  22636. + rcu_read_lock();
  22637. kc = clockid_to_kclock(timr->it_clock);
  22638. if (WARN_ON_ONCE(!kc || !kc->timer_set))
  22639. error = -EINVAL;
  22640. @@ -904,9 +921,12 @@
  22641. unlock_timer(timr, flag);
  22642. if (error == TIMER_RETRY) {
  22643. + timer_wait_for_callback(kc, timr);
  22644. rtn = NULL; // We already got the old time...
  22645. + rcu_read_unlock();
  22646. goto retry;
  22647. }
  22648. + rcu_read_unlock();
  22649. if (old_setting && !error &&
  22650. copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
  22651. @@ -944,10 +964,15 @@
  22652. if (!timer)
  22653. return -EINVAL;
  22654. + rcu_read_lock();
  22655. if (timer_delete_hook(timer) == TIMER_RETRY) {
  22656. unlock_timer(timer, flags);
  22657. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  22658. + timer);
  22659. + rcu_read_unlock();
  22660. goto retry_delete;
  22661. }
  22662. + rcu_read_unlock();
  22663. spin_lock(&current->sighand->siglock);
  22664. list_del(&timer->list);
  22665. @@ -973,8 +998,18 @@
  22666. retry_delete:
  22667. spin_lock_irqsave(&timer->it_lock, flags);
  22668. + /* On RT we can race with a deletion */
  22669. + if (!timer->it_signal) {
  22670. + unlock_timer(timer, flags);
  22671. + return;
  22672. + }
  22673. +
  22674. if (timer_delete_hook(timer) == TIMER_RETRY) {
  22675. + rcu_read_lock();
  22676. unlock_timer(timer, flags);
  22677. + timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
  22678. + timer);
  22679. + rcu_read_unlock();
  22680. goto retry_delete;
  22681. }
  22682. list_del(&timer->list);
  22683. diff -Nur linux-4.1.39.orig/kernel/time/tick-broadcast-hrtimer.c linux-4.1.39/kernel/time/tick-broadcast-hrtimer.c
  22684. --- linux-4.1.39.orig/kernel/time/tick-broadcast-hrtimer.c 2017-03-13 21:04:36.000000000 +0100
  22685. +++ linux-4.1.39/kernel/time/tick-broadcast-hrtimer.c 2017-04-18 17:56:30.641398216 +0200
  22686. @@ -109,5 +109,6 @@
  22687. {
  22688. hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  22689. bctimer.function = bc_handler;
  22690. + bctimer.irqsafe = true;
  22691. clockevents_register_device(&ce_broadcast_hrtimer);
  22692. }
  22693. diff -Nur linux-4.1.39.orig/kernel/time/tick-common.c linux-4.1.39/kernel/time/tick-common.c
  22694. --- linux-4.1.39.orig/kernel/time/tick-common.c 2017-03-13 21:04:36.000000000 +0100
  22695. +++ linux-4.1.39/kernel/time/tick-common.c 2017-04-18 17:56:30.641398216 +0200
  22696. @@ -78,13 +78,15 @@
  22697. static void tick_periodic(int cpu)
  22698. {
  22699. if (tick_do_timer_cpu == cpu) {
  22700. - write_seqlock(&jiffies_lock);
  22701. + raw_spin_lock(&jiffies_lock);
  22702. + write_seqcount_begin(&jiffies_seq);
  22703. /* Keep track of the next tick event */
  22704. tick_next_period = ktime_add(tick_next_period, tick_period);
  22705. do_timer(1);
  22706. - write_sequnlock(&jiffies_lock);
  22707. + write_seqcount_end(&jiffies_seq);
  22708. + raw_spin_unlock(&jiffies_lock);
  22709. update_wall_time();
  22710. }
  22711. @@ -146,9 +148,9 @@
  22712. ktime_t next;
  22713. do {
  22714. - seq = read_seqbegin(&jiffies_lock);
  22715. + seq = read_seqcount_begin(&jiffies_seq);
  22716. next = tick_next_period;
  22717. - } while (read_seqretry(&jiffies_lock, seq));
  22718. + } while (read_seqcount_retry(&jiffies_seq, seq));
  22719. clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
  22720. diff -Nur linux-4.1.39.orig/kernel/time/tick-sched.c linux-4.1.39/kernel/time/tick-sched.c
  22721. --- linux-4.1.39.orig/kernel/time/tick-sched.c 2017-03-13 21:04:36.000000000 +0100
  22722. +++ linux-4.1.39/kernel/time/tick-sched.c 2017-04-18 17:56:30.641398216 +0200
  22723. @@ -62,7 +62,8 @@
  22724. return;
  22725. /* Reevalute with jiffies_lock held */
  22726. - write_seqlock(&jiffies_lock);
  22727. + raw_spin_lock(&jiffies_lock);
  22728. + write_seqcount_begin(&jiffies_seq);
  22729. delta = ktime_sub(now, last_jiffies_update);
  22730. if (delta.tv64 >= tick_period.tv64) {
  22731. @@ -85,10 +86,12 @@
  22732. /* Keep the tick_next_period variable up to date */
  22733. tick_next_period = ktime_add(last_jiffies_update, tick_period);
  22734. } else {
  22735. - write_sequnlock(&jiffies_lock);
  22736. + write_seqcount_end(&jiffies_seq);
  22737. + raw_spin_unlock(&jiffies_lock);
  22738. return;
  22739. }
  22740. - write_sequnlock(&jiffies_lock);
  22741. + write_seqcount_end(&jiffies_seq);
  22742. + raw_spin_unlock(&jiffies_lock);
  22743. update_wall_time();
  22744. }
  22745. @@ -99,12 +102,14 @@
  22746. {
  22747. ktime_t period;
  22748. - write_seqlock(&jiffies_lock);
  22749. + raw_spin_lock(&jiffies_lock);
  22750. + write_seqcount_begin(&jiffies_seq);
  22751. /* Did we start the jiffies update yet ? */
  22752. if (last_jiffies_update.tv64 == 0)
  22753. last_jiffies_update = tick_next_period;
  22754. period = last_jiffies_update;
  22755. - write_sequnlock(&jiffies_lock);
  22756. + write_seqcount_end(&jiffies_seq);
  22757. + raw_spin_unlock(&jiffies_lock);
  22758. return period;
  22759. }
  22760. @@ -176,6 +181,11 @@
  22761. return false;
  22762. }
  22763. + if (!arch_irq_work_has_interrupt()) {
  22764. + trace_tick_stop(0, "missing irq work interrupt\n");
  22765. + return false;
  22766. + }
  22767. +
  22768. /* sched_clock_tick() needs us? */
  22769. #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
  22770. /*
  22771. @@ -222,6 +232,7 @@
  22772. static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
  22773. .func = nohz_full_kick_work_func,
  22774. + .flags = IRQ_WORK_HARD_IRQ,
  22775. };
  22776. /*
  22777. @@ -578,10 +589,10 @@
  22778. /* Read jiffies and the time when jiffies were updated last */
  22779. do {
  22780. - seq = read_seqbegin(&jiffies_lock);
  22781. + seq = read_seqcount_begin(&jiffies_seq);
  22782. last_update = last_jiffies_update;
  22783. last_jiffies = jiffies;
  22784. - } while (read_seqretry(&jiffies_lock, seq));
  22785. + } while (read_seqcount_retry(&jiffies_seq, seq));
  22786. if (rcu_needs_cpu(&rcu_delta_jiffies) ||
  22787. arch_needs_cpu() || irq_work_needs_cpu()) {
  22788. @@ -759,14 +770,7 @@
  22789. return false;
  22790. if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
  22791. - static int ratelimit;
  22792. -
  22793. - if (ratelimit < 10 &&
  22794. - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
  22795. - pr_warn("NOHZ: local_softirq_pending %02x\n",
  22796. - (unsigned int) local_softirq_pending());
  22797. - ratelimit++;
  22798. - }
  22799. + softirq_check_pending_idle();
  22800. return false;
  22801. }
  22802. @@ -1154,6 +1158,7 @@
  22803. * Emulate tick processing via per-CPU hrtimers:
  22804. */
  22805. hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  22806. + ts->sched_timer.irqsafe = 1;
  22807. ts->sched_timer.function = tick_sched_timer;
  22808. /* Get the next period (per cpu) */
  22809. diff -Nur linux-4.1.39.orig/kernel/time/timekeeping.c linux-4.1.39/kernel/time/timekeeping.c
  22810. --- linux-4.1.39.orig/kernel/time/timekeeping.c 2017-03-13 21:04:36.000000000 +0100
  22811. +++ linux-4.1.39/kernel/time/timekeeping.c 2017-04-18 17:56:30.641398216 +0200
  22812. @@ -2087,8 +2087,10 @@
  22813. */
  22814. void xtime_update(unsigned long ticks)
  22815. {
  22816. - write_seqlock(&jiffies_lock);
  22817. + raw_spin_lock(&jiffies_lock);
  22818. + write_seqcount_begin(&jiffies_seq);
  22819. do_timer(ticks);
  22820. - write_sequnlock(&jiffies_lock);
  22821. + write_seqcount_end(&jiffies_seq);
  22822. + raw_spin_unlock(&jiffies_lock);
  22823. update_wall_time();
  22824. }
  22825. diff -Nur linux-4.1.39.orig/kernel/time/timekeeping.h linux-4.1.39/kernel/time/timekeeping.h
  22826. --- linux-4.1.39.orig/kernel/time/timekeeping.h 2017-03-13 21:04:36.000000000 +0100
  22827. +++ linux-4.1.39/kernel/time/timekeeping.h 2017-04-18 17:56:30.641398216 +0200
  22828. @@ -22,7 +22,8 @@
  22829. extern void do_timer(unsigned long ticks);
  22830. extern void update_wall_time(void);
  22831. -extern seqlock_t jiffies_lock;
  22832. +extern raw_spinlock_t jiffies_lock;
  22833. +extern seqcount_t jiffies_seq;
  22834. #define CS_NAME_LEN 32
  22835. diff -Nur linux-4.1.39.orig/kernel/time/timer.c linux-4.1.39/kernel/time/timer.c
  22836. --- linux-4.1.39.orig/kernel/time/timer.c 2017-03-13 21:04:36.000000000 +0100
  22837. +++ linux-4.1.39/kernel/time/timer.c 2017-04-18 17:56:30.641398216 +0200
  22838. @@ -78,6 +78,9 @@
  22839. struct tvec_base {
  22840. spinlock_t lock;
  22841. struct timer_list *running_timer;
  22842. +#ifdef CONFIG_PREEMPT_RT_FULL
  22843. + wait_queue_head_t wait_for_running_timer;
  22844. +#endif
  22845. unsigned long timer_jiffies;
  22846. unsigned long next_timer;
  22847. unsigned long active_timers;
  22848. @@ -768,6 +771,36 @@
  22849. }
  22850. }
  22851. +#ifndef CONFIG_PREEMPT_RT_FULL
  22852. +static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
  22853. + struct tvec_base *old,
  22854. + struct tvec_base *new)
  22855. +{
  22856. + /* See the comment in lock_timer_base() */
  22857. + timer_set_base(timer, NULL);
  22858. + spin_unlock(&old->lock);
  22859. + spin_lock(&new->lock);
  22860. + timer_set_base(timer, new);
  22861. + return new;
  22862. +}
  22863. +#else
  22864. +static inline struct tvec_base *switch_timer_base(struct timer_list *timer,
  22865. + struct tvec_base *old,
  22866. + struct tvec_base *new)
  22867. +{
  22868. + /*
  22869. + * We cannot do the above because we might be preempted and
  22870. + * then the preempter would see NULL and loop forever.
  22871. + */
  22872. + if (spin_trylock(&new->lock)) {
  22873. + timer_set_base(timer, new);
  22874. + spin_unlock(&old->lock);
  22875. + return new;
  22876. + }
  22877. + return old;
  22878. +}
  22879. +#endif
  22880. +
  22881. static inline int
  22882. __mod_timer(struct timer_list *timer, unsigned long expires,
  22883. bool pending_only, int pinned)
  22884. @@ -798,14 +831,8 @@
  22885. * handler yet has not finished. This also guarantees that
  22886. * the timer is serialized wrt itself.
  22887. */
  22888. - if (likely(base->running_timer != timer)) {
  22889. - /* See the comment in lock_timer_base() */
  22890. - timer_set_base(timer, NULL);
  22891. - spin_unlock(&base->lock);
  22892. - base = new_base;
  22893. - spin_lock(&base->lock);
  22894. - timer_set_base(timer, base);
  22895. - }
  22896. + if (likely(base->running_timer != timer))
  22897. + base = switch_timer_base(timer, base, new_base);
  22898. }
  22899. timer->expires = expires;
  22900. @@ -979,6 +1006,29 @@
  22901. }
  22902. EXPORT_SYMBOL_GPL(add_timer_on);
  22903. +#ifdef CONFIG_PREEMPT_RT_FULL
  22904. +/*
  22905. + * Wait for a running timer
  22906. + */
  22907. +static void wait_for_running_timer(struct timer_list *timer)
  22908. +{
  22909. + struct tvec_base *base = timer->base;
  22910. +
  22911. + if (base->running_timer == timer)
  22912. + wait_event(base->wait_for_running_timer,
  22913. + base->running_timer != timer);
  22914. +}
  22915. +
  22916. +# define wakeup_timer_waiters(b) wake_up_all(&(b)->wait_for_running_timer)
  22917. +#else
  22918. +static inline void wait_for_running_timer(struct timer_list *timer)
  22919. +{
  22920. + cpu_relax();
  22921. +}
  22922. +
  22923. +# define wakeup_timer_waiters(b) do { } while (0)
  22924. +#endif
  22925. +
  22926. /**
  22927. * del_timer - deactive a timer.
  22928. * @timer: the timer to be deactivated
  22929. @@ -1036,7 +1086,7 @@
  22930. }
  22931. EXPORT_SYMBOL(try_to_del_timer_sync);
  22932. -#ifdef CONFIG_SMP
  22933. +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
  22934. static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
  22935. /**
  22936. @@ -1098,7 +1148,7 @@
  22937. int ret = try_to_del_timer_sync(timer);
  22938. if (ret >= 0)
  22939. return ret;
  22940. - cpu_relax();
  22941. + wait_for_running_timer(timer);
  22942. }
  22943. }
  22944. EXPORT_SYMBOL(del_timer_sync);
  22945. @@ -1219,16 +1269,18 @@
  22946. if (irqsafe) {
  22947. spin_unlock(&base->lock);
  22948. call_timer_fn(timer, fn, data);
  22949. + base->running_timer = NULL;
  22950. spin_lock(&base->lock);
  22951. } else {
  22952. spin_unlock_irq(&base->lock);
  22953. call_timer_fn(timer, fn, data);
  22954. + base->running_timer = NULL;
  22955. spin_lock_irq(&base->lock);
  22956. }
  22957. }
  22958. }
  22959. - base->running_timer = NULL;
  22960. spin_unlock_irq(&base->lock);
  22961. + wakeup_timer_waiters(base);
  22962. }
  22963. #ifdef CONFIG_NO_HZ_COMMON
  22964. @@ -1367,6 +1419,14 @@
  22965. if (cpu_is_offline(smp_processor_id()))
  22966. return expires;
  22967. +#ifdef CONFIG_PREEMPT_RT_FULL
  22968. + /*
  22969. + * On PREEMPT_RT we cannot sleep here. As a result we can't take
  22970. + * the base lock to check when the next timer is pending and so
  22971. + * we assume the next jiffy.
  22972. + */
  22973. + return now + 1;
  22974. +#endif
  22975. spin_lock(&base->lock);
  22976. if (base->active_timers) {
  22977. if (time_before_eq(base->next_timer, base->timer_jiffies))
  22978. @@ -1392,13 +1452,13 @@
  22979. /* Note: this timer irq context must be accounted for as well. */
  22980. account_process_tick(p, user_tick);
  22981. + scheduler_tick();
  22982. run_local_timers();
  22983. rcu_check_callbacks(user_tick);
  22984. -#ifdef CONFIG_IRQ_WORK
  22985. +#if defined(CONFIG_IRQ_WORK)
  22986. if (in_irq())
  22987. irq_work_tick();
  22988. #endif
  22989. - scheduler_tick();
  22990. run_posix_cpu_timers(p);
  22991. }
  22992. @@ -1411,6 +1471,8 @@
  22993. hrtimer_run_pending();
  22994. + irq_work_tick_soft();
  22995. +
  22996. if (time_after_eq(jiffies, base->timer_jiffies))
  22997. __run_timers(base);
  22998. }
  22999. @@ -1566,7 +1628,7 @@
  23000. BUG_ON(cpu_online(cpu));
  23001. old_base = per_cpu(tvec_bases, cpu);
  23002. - new_base = get_cpu_var(tvec_bases);
  23003. + new_base = get_local_var(tvec_bases);
  23004. /*
  23005. * The caller is globally serialized and nobody else
  23006. * takes two locks at once, deadlock is not possible.
  23007. @@ -1590,7 +1652,7 @@
  23008. spin_unlock(&old_base->lock);
  23009. spin_unlock_irq(&new_base->lock);
  23010. - put_cpu_var(tvec_bases);
  23011. + put_local_var(tvec_bases);
  23012. }
  23013. static int timer_cpu_notify(struct notifier_block *self,
  23014. @@ -1625,6 +1687,9 @@
  23015. base->cpu = cpu;
  23016. per_cpu(tvec_bases, cpu) = base;
  23017. spin_lock_init(&base->lock);
  23018. +#ifdef CONFIG_PREEMPT_RT_FULL
  23019. + init_waitqueue_head(&base->wait_for_running_timer);
  23020. +#endif
  23021. for (j = 0; j < TVN_SIZE; j++) {
  23022. INIT_LIST_HEAD(base->tv5.vec + j);
  23023. diff -Nur linux-4.1.39.orig/kernel/trace/Kconfig linux-4.1.39/kernel/trace/Kconfig
  23024. --- linux-4.1.39.orig/kernel/trace/Kconfig 2017-03-13 21:04:36.000000000 +0100
  23025. +++ linux-4.1.39/kernel/trace/Kconfig 2017-04-18 17:56:30.641398216 +0200
  23026. @@ -187,6 +187,24 @@
  23027. enabled. This option and the preempt-off timing option can be
  23028. used together or separately.)
  23029. +config INTERRUPT_OFF_HIST
  23030. + bool "Interrupts-off Latency Histogram"
  23031. + depends on IRQSOFF_TRACER
  23032. + help
  23033. + This option generates continuously updated histograms (one per cpu)
  23034. + of the duration of time periods with interrupts disabled. The
  23035. + histograms are disabled by default. To enable them, write a non-zero
  23036. + number to
  23037. +
  23038. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  23039. +
  23040. + If PREEMPT_OFF_HIST is also selected, additional histograms (one
  23041. + per cpu) are generated that accumulate the duration of time periods
  23042. + when both interrupts and preemption are disabled. The histogram data
  23043. + will be located in the debug file system at
  23044. +
  23045. + /sys/kernel/debug/tracing/latency_hist/irqsoff
  23046. +
  23047. config PREEMPT_TRACER
  23048. bool "Preemption-off Latency Tracer"
  23049. default n
  23050. @@ -211,6 +229,24 @@
  23051. enabled. This option and the irqs-off timing option can be
  23052. used together or separately.)
  23053. +config PREEMPT_OFF_HIST
  23054. + bool "Preemption-off Latency Histogram"
  23055. + depends on PREEMPT_TRACER
  23056. + help
  23057. + This option generates continuously updated histograms (one per cpu)
  23058. + of the duration of time periods with preemption disabled. The
  23059. + histograms are disabled by default. To enable them, write a non-zero
  23060. + number to
  23061. +
  23062. + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff
  23063. +
  23064. + If INTERRUPT_OFF_HIST is also selected, additional histograms (one
  23065. + per cpu) are generated that accumulate the duration of time periods
  23066. + when both interrupts and preemption are disabled. The histogram data
  23067. + will be located in the debug file system at
  23068. +
  23069. + /sys/kernel/debug/tracing/latency_hist/preemptoff
  23070. +
  23071. config SCHED_TRACER
  23072. bool "Scheduling Latency Tracer"
  23073. select GENERIC_TRACER
  23074. @@ -221,6 +257,74 @@
  23075. This tracer tracks the latency of the highest priority task
  23076. to be scheduled in, starting from the point it has woken up.
  23077. +config WAKEUP_LATENCY_HIST
  23078. + bool "Scheduling Latency Histogram"
  23079. + depends on SCHED_TRACER
  23080. + help
  23081. + This option generates continuously updated histograms (one per cpu)
  23082. + of the scheduling latency of the highest priority task.
  23083. + The histograms are disabled by default. To enable them, write a
  23084. + non-zero number to
  23085. +
  23086. + /sys/kernel/debug/tracing/latency_hist/enable/wakeup
  23087. +
  23088. + Two different algorithms are used, one to determine the latency of
  23089. + processes that exclusively use the highest priority of the system and
  23090. + another one to determine the latency of processes that share the
  23091. + highest system priority with other processes. The former is used to
  23092. + improve hardware and system software, the latter to optimize the
  23093. + priority design of a given system. The histogram data will be
  23094. + located in the debug file system at
  23095. +
  23096. + /sys/kernel/debug/tracing/latency_hist/wakeup
  23097. +
  23098. + and
  23099. +
  23100. + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio
  23101. +
  23102. + If both Scheduling Latency Histogram and Missed Timer Offsets
  23103. + Histogram are selected, additional histogram data will be collected
  23104. + that contain, in addition to the wakeup latency, the timer latency, in
  23105. + case the wakeup was triggered by an expired timer. These histograms
  23106. + are available in the
  23107. +
  23108. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  23109. +
  23110. + directory. They reflect the apparent interrupt and scheduling latency
  23111. + and are best suitable to determine the worst-case latency of a given
  23112. + system. To enable these histograms, write a non-zero number to
  23113. +
  23114. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  23115. +
  23116. +config MISSED_TIMER_OFFSETS_HIST
  23117. + depends on HIGH_RES_TIMERS
  23118. + select GENERIC_TRACER
  23119. + bool "Missed Timer Offsets Histogram"
  23120. + help
  23121. + Generate a histogram of missed timer offsets in microseconds. The
  23122. + histograms are disabled by default. To enable them, write a non-zero
  23123. + number to
  23124. +
  23125. + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets
  23126. +
  23127. + The histogram data will be located in the debug file system at
  23128. +
  23129. + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets
  23130. +
  23131. + If both Scheduling Latency Histogram and Missed Timer Offsets
  23132. + Histogram are selected, additional histogram data will be collected
  23133. + that contain, in addition to the wakeup latency, the timer latency, in
  23134. + case the wakeup was triggered by an expired timer. These histograms
  23135. + are available in the
  23136. +
  23137. + /sys/kernel/debug/tracing/latency_hist/timerandwakeup
  23138. +
  23139. + directory. They reflect the apparent interrupt and scheduling latency
  23140. + and are best suitable to determine the worst-case latency of a given
  23141. + system. To enable these histograms, write a non-zero number to
  23142. +
  23143. + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
  23144. +
  23145. config ENABLE_DEFAULT_TRACERS
  23146. bool "Trace process context switches and events"
  23147. depends on !GENERIC_TRACER
  23148. diff -Nur linux-4.1.39.orig/kernel/trace/latency_hist.c linux-4.1.39/kernel/trace/latency_hist.c
  23149. --- linux-4.1.39.orig/kernel/trace/latency_hist.c 1970-01-01 01:00:00.000000000 +0100
  23150. +++ linux-4.1.39/kernel/trace/latency_hist.c 2017-04-18 17:56:30.641398216 +0200
  23151. @@ -0,0 +1,1178 @@
  23152. +/*
  23153. + * kernel/trace/latency_hist.c
  23154. + *
  23155. + * Add support for histograms of preemption-off latency and
  23156. + * interrupt-off latency and wakeup latency, it depends on
  23157. + * Real-Time Preemption Support.
  23158. + *
  23159. + * Copyright (C) 2005 MontaVista Software, Inc.
  23160. + * Yi Yang <yyang@ch.mvista.com>
  23161. + *
  23162. + * Converted to work with the new latency tracer.
  23163. + * Copyright (C) 2008 Red Hat, Inc.
  23164. + * Steven Rostedt <srostedt@redhat.com>
  23165. + *
  23166. + */
  23167. +#include <linux/module.h>
  23168. +#include <linux/debugfs.h>
  23169. +#include <linux/seq_file.h>
  23170. +#include <linux/percpu.h>
  23171. +#include <linux/kallsyms.h>
  23172. +#include <linux/uaccess.h>
  23173. +#include <linux/sched.h>
  23174. +#include <linux/sched/rt.h>
  23175. +#include <linux/slab.h>
  23176. +#include <linux/atomic.h>
  23177. +#include <asm/div64.h>
  23178. +
  23179. +#include "trace.h"
  23180. +#include <trace/events/sched.h>
  23181. +
  23182. +#define NSECS_PER_USECS 1000L
  23183. +
  23184. +#define CREATE_TRACE_POINTS
  23185. +#include <trace/events/hist.h>
  23186. +
  23187. +enum {
  23188. + IRQSOFF_LATENCY = 0,
  23189. + PREEMPTOFF_LATENCY,
  23190. + PREEMPTIRQSOFF_LATENCY,
  23191. + WAKEUP_LATENCY,
  23192. + WAKEUP_LATENCY_SHAREDPRIO,
  23193. + MISSED_TIMER_OFFSETS,
  23194. + TIMERANDWAKEUP_LATENCY,
  23195. + MAX_LATENCY_TYPE,
  23196. +};
  23197. +
  23198. +#define MAX_ENTRY_NUM 10240
  23199. +
  23200. +struct hist_data {
  23201. + atomic_t hist_mode; /* 0 log, 1 don't log */
  23202. + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */
  23203. + long min_lat;
  23204. + long max_lat;
  23205. + unsigned long long below_hist_bound_samples;
  23206. + unsigned long long above_hist_bound_samples;
  23207. + long long accumulate_lat;
  23208. + unsigned long long total_samples;
  23209. + unsigned long long hist_array[MAX_ENTRY_NUM];
  23210. +};
  23211. +
  23212. +struct enable_data {
  23213. + int latency_type;
  23214. + int enabled;
  23215. +};
  23216. +
  23217. +static char *latency_hist_dir_root = "latency_hist";
  23218. +
  23219. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  23220. +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist);
  23221. +static char *irqsoff_hist_dir = "irqsoff";
  23222. +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
  23223. +static DEFINE_PER_CPU(int, hist_irqsoff_counting);
  23224. +#endif
  23225. +
  23226. +#ifdef CONFIG_PREEMPT_OFF_HIST
  23227. +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist);
  23228. +static char *preemptoff_hist_dir = "preemptoff";
  23229. +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
  23230. +static DEFINE_PER_CPU(int, hist_preemptoff_counting);
  23231. +#endif
  23232. +
  23233. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  23234. +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist);
  23235. +static char *preemptirqsoff_hist_dir = "preemptirqsoff";
  23236. +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
  23237. +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting);
  23238. +#endif
  23239. +
  23240. +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST)
  23241. +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start);
  23242. +static struct enable_data preemptirqsoff_enabled_data = {
  23243. + .latency_type = PREEMPTIRQSOFF_LATENCY,
  23244. + .enabled = 0,
  23245. +};
  23246. +#endif
  23247. +
  23248. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23249. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23250. +struct maxlatproc_data {
  23251. + char comm[FIELD_SIZEOF(struct task_struct, comm)];
  23252. + char current_comm[FIELD_SIZEOF(struct task_struct, comm)];
  23253. + int pid;
  23254. + int current_pid;
  23255. + int prio;
  23256. + int current_prio;
  23257. + long latency;
  23258. + long timeroffset;
  23259. + cycle_t timestamp;
  23260. +};
  23261. +#endif
  23262. +
  23263. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23264. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist);
  23265. +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio);
  23266. +static char *wakeup_latency_hist_dir = "wakeup";
  23267. +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio";
  23268. +static notrace void probe_wakeup_latency_hist_start(void *v,
  23269. + struct task_struct *p);
  23270. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  23271. + struct task_struct *prev, struct task_struct *next);
  23272. +static notrace void probe_sched_migrate_task(void *,
  23273. + struct task_struct *task, int cpu);
  23274. +static struct enable_data wakeup_latency_enabled_data = {
  23275. + .latency_type = WAKEUP_LATENCY,
  23276. + .enabled = 0,
  23277. +};
  23278. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc);
  23279. +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio);
  23280. +static DEFINE_PER_CPU(struct task_struct *, wakeup_task);
  23281. +static DEFINE_PER_CPU(int, wakeup_sharedprio);
  23282. +static unsigned long wakeup_pid;
  23283. +#endif
  23284. +
  23285. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23286. +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets);
  23287. +static char *missed_timer_offsets_dir = "missed_timer_offsets";
  23288. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  23289. + long long offset, struct task_struct *curr, struct task_struct *task);
  23290. +static struct enable_data missed_timer_offsets_enabled_data = {
  23291. + .latency_type = MISSED_TIMER_OFFSETS,
  23292. + .enabled = 0,
  23293. +};
  23294. +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc);
  23295. +static unsigned long missed_timer_offsets_pid;
  23296. +#endif
  23297. +
  23298. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  23299. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23300. +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
  23301. +static char *timerandwakeup_latency_hist_dir = "timerandwakeup";
  23302. +static struct enable_data timerandwakeup_enabled_data = {
  23303. + .latency_type = TIMERANDWAKEUP_LATENCY,
  23304. + .enabled = 0,
  23305. +};
  23306. +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
  23307. +#endif
  23308. +
  23309. +void notrace latency_hist(int latency_type, int cpu, long latency,
  23310. + long timeroffset, cycle_t stop,
  23311. + struct task_struct *p)
  23312. +{
  23313. + struct hist_data *my_hist;
  23314. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23315. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23316. + struct maxlatproc_data *mp = NULL;
  23317. +#endif
  23318. +
  23319. + if (!cpu_possible(cpu) || latency_type < 0 ||
  23320. + latency_type >= MAX_LATENCY_TYPE)
  23321. + return;
  23322. +
  23323. + switch (latency_type) {
  23324. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  23325. + case IRQSOFF_LATENCY:
  23326. + my_hist = &per_cpu(irqsoff_hist, cpu);
  23327. + break;
  23328. +#endif
  23329. +#ifdef CONFIG_PREEMPT_OFF_HIST
  23330. + case PREEMPTOFF_LATENCY:
  23331. + my_hist = &per_cpu(preemptoff_hist, cpu);
  23332. + break;
  23333. +#endif
  23334. +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
  23335. + case PREEMPTIRQSOFF_LATENCY:
  23336. + my_hist = &per_cpu(preemptirqsoff_hist, cpu);
  23337. + break;
  23338. +#endif
  23339. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23340. + case WAKEUP_LATENCY:
  23341. + my_hist = &per_cpu(wakeup_latency_hist, cpu);
  23342. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  23343. + break;
  23344. + case WAKEUP_LATENCY_SHAREDPRIO:
  23345. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  23346. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  23347. + break;
  23348. +#endif
  23349. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23350. + case MISSED_TIMER_OFFSETS:
  23351. + my_hist = &per_cpu(missed_timer_offsets, cpu);
  23352. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  23353. + break;
  23354. +#endif
  23355. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  23356. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23357. + case TIMERANDWAKEUP_LATENCY:
  23358. + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  23359. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  23360. + break;
  23361. +#endif
  23362. +
  23363. + default:
  23364. + return;
  23365. + }
  23366. +
  23367. + latency += my_hist->offset;
  23368. +
  23369. + if (atomic_read(&my_hist->hist_mode) == 0)
  23370. + return;
  23371. +
  23372. + if (latency < 0 || latency >= MAX_ENTRY_NUM) {
  23373. + if (latency < 0)
  23374. + my_hist->below_hist_bound_samples++;
  23375. + else
  23376. + my_hist->above_hist_bound_samples++;
  23377. + } else
  23378. + my_hist->hist_array[latency]++;
  23379. +
  23380. + if (unlikely(latency > my_hist->max_lat ||
  23381. + my_hist->min_lat == LONG_MAX)) {
  23382. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23383. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23384. + if (latency_type == WAKEUP_LATENCY ||
  23385. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  23386. + latency_type == MISSED_TIMER_OFFSETS ||
  23387. + latency_type == TIMERANDWAKEUP_LATENCY) {
  23388. + strncpy(mp->comm, p->comm, sizeof(mp->comm));
  23389. + strncpy(mp->current_comm, current->comm,
  23390. + sizeof(mp->current_comm));
  23391. + mp->pid = task_pid_nr(p);
  23392. + mp->current_pid = task_pid_nr(current);
  23393. + mp->prio = p->prio;
  23394. + mp->current_prio = current->prio;
  23395. + mp->latency = latency;
  23396. + mp->timeroffset = timeroffset;
  23397. + mp->timestamp = stop;
  23398. + }
  23399. +#endif
  23400. + my_hist->max_lat = latency;
  23401. + }
  23402. + if (unlikely(latency < my_hist->min_lat))
  23403. + my_hist->min_lat = latency;
  23404. + my_hist->total_samples++;
  23405. + my_hist->accumulate_lat += latency;
  23406. +}
  23407. +
  23408. +static void *l_start(struct seq_file *m, loff_t *pos)
  23409. +{
  23410. + loff_t *index_ptr = NULL;
  23411. + loff_t index = *pos;
  23412. + struct hist_data *my_hist = m->private;
  23413. +
  23414. + if (index == 0) {
  23415. + char minstr[32], avgstr[32], maxstr[32];
  23416. +
  23417. + atomic_dec(&my_hist->hist_mode);
  23418. +
  23419. + if (likely(my_hist->total_samples)) {
  23420. + long avg = (long) div64_s64(my_hist->accumulate_lat,
  23421. + my_hist->total_samples);
  23422. + snprintf(minstr, sizeof(minstr), "%ld",
  23423. + my_hist->min_lat - my_hist->offset);
  23424. + snprintf(avgstr, sizeof(avgstr), "%ld",
  23425. + avg - my_hist->offset);
  23426. + snprintf(maxstr, sizeof(maxstr), "%ld",
  23427. + my_hist->max_lat - my_hist->offset);
  23428. + } else {
  23429. + strcpy(minstr, "<undef>");
  23430. + strcpy(avgstr, minstr);
  23431. + strcpy(maxstr, minstr);
  23432. + }
  23433. +
  23434. + seq_printf(m, "#Minimum latency: %s microseconds\n"
  23435. + "#Average latency: %s microseconds\n"
  23436. + "#Maximum latency: %s microseconds\n"
  23437. + "#Total samples: %llu\n"
  23438. + "#There are %llu samples lower than %ld"
  23439. + " microseconds.\n"
  23440. + "#There are %llu samples greater or equal"
  23441. + " than %ld microseconds.\n"
  23442. + "#usecs\t%16s\n",
  23443. + minstr, avgstr, maxstr,
  23444. + my_hist->total_samples,
  23445. + my_hist->below_hist_bound_samples,
  23446. + -my_hist->offset,
  23447. + my_hist->above_hist_bound_samples,
  23448. + MAX_ENTRY_NUM - my_hist->offset,
  23449. + "samples");
  23450. + }
  23451. + if (index < MAX_ENTRY_NUM) {
  23452. + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
  23453. + if (index_ptr)
  23454. + *index_ptr = index;
  23455. + }
  23456. +
  23457. + return index_ptr;
  23458. +}
  23459. +
  23460. +static void *l_next(struct seq_file *m, void *p, loff_t *pos)
  23461. +{
  23462. + loff_t *index_ptr = p;
  23463. + struct hist_data *my_hist = m->private;
  23464. +
  23465. + if (++*pos >= MAX_ENTRY_NUM) {
  23466. + atomic_inc(&my_hist->hist_mode);
  23467. + return NULL;
  23468. + }
  23469. + *index_ptr = *pos;
  23470. + return index_ptr;
  23471. +}
  23472. +
  23473. +static void l_stop(struct seq_file *m, void *p)
  23474. +{
  23475. + kfree(p);
  23476. +}
  23477. +
  23478. +static int l_show(struct seq_file *m, void *p)
  23479. +{
  23480. + int index = *(loff_t *) p;
  23481. + struct hist_data *my_hist = m->private;
  23482. +
  23483. + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset,
  23484. + my_hist->hist_array[index]);
  23485. + return 0;
  23486. +}
  23487. +
  23488. +static const struct seq_operations latency_hist_seq_op = {
  23489. + .start = l_start,
  23490. + .next = l_next,
  23491. + .stop = l_stop,
  23492. + .show = l_show
  23493. +};
  23494. +
  23495. +static int latency_hist_open(struct inode *inode, struct file *file)
  23496. +{
  23497. + int ret;
  23498. +
  23499. + ret = seq_open(file, &latency_hist_seq_op);
  23500. + if (!ret) {
  23501. + struct seq_file *seq = file->private_data;
  23502. + seq->private = inode->i_private;
  23503. + }
  23504. + return ret;
  23505. +}
  23506. +
  23507. +static const struct file_operations latency_hist_fops = {
  23508. + .open = latency_hist_open,
  23509. + .read = seq_read,
  23510. + .llseek = seq_lseek,
  23511. + .release = seq_release,
  23512. +};
  23513. +
  23514. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23515. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23516. +static void clear_maxlatprocdata(struct maxlatproc_data *mp)
  23517. +{
  23518. + mp->comm[0] = mp->current_comm[0] = '\0';
  23519. + mp->prio = mp->current_prio = mp->pid = mp->current_pid =
  23520. + mp->latency = mp->timeroffset = -1;
  23521. + mp->timestamp = 0;
  23522. +}
  23523. +#endif
  23524. +
  23525. +static void hist_reset(struct hist_data *hist)
  23526. +{
  23527. + atomic_dec(&hist->hist_mode);
  23528. +
  23529. + memset(hist->hist_array, 0, sizeof(hist->hist_array));
  23530. + hist->below_hist_bound_samples = 0ULL;
  23531. + hist->above_hist_bound_samples = 0ULL;
  23532. + hist->min_lat = LONG_MAX;
  23533. + hist->max_lat = LONG_MIN;
  23534. + hist->total_samples = 0ULL;
  23535. + hist->accumulate_lat = 0LL;
  23536. +
  23537. + atomic_inc(&hist->hist_mode);
  23538. +}
  23539. +
  23540. +static ssize_t
  23541. +latency_hist_reset(struct file *file, const char __user *a,
  23542. + size_t size, loff_t *off)
  23543. +{
  23544. + int cpu;
  23545. + struct hist_data *hist = NULL;
  23546. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23547. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23548. + struct maxlatproc_data *mp = NULL;
  23549. +#endif
  23550. + off_t latency_type = (off_t) file->private_data;
  23551. +
  23552. + for_each_online_cpu(cpu) {
  23553. +
  23554. + switch (latency_type) {
  23555. +#ifdef CONFIG_PREEMPT_OFF_HIST
  23556. + case PREEMPTOFF_LATENCY:
  23557. + hist = &per_cpu(preemptoff_hist, cpu);
  23558. + break;
  23559. +#endif
  23560. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  23561. + case IRQSOFF_LATENCY:
  23562. + hist = &per_cpu(irqsoff_hist, cpu);
  23563. + break;
  23564. +#endif
  23565. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  23566. + case PREEMPTIRQSOFF_LATENCY:
  23567. + hist = &per_cpu(preemptirqsoff_hist, cpu);
  23568. + break;
  23569. +#endif
  23570. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23571. + case WAKEUP_LATENCY:
  23572. + hist = &per_cpu(wakeup_latency_hist, cpu);
  23573. + mp = &per_cpu(wakeup_maxlatproc, cpu);
  23574. + break;
  23575. + case WAKEUP_LATENCY_SHAREDPRIO:
  23576. + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu);
  23577. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
  23578. + break;
  23579. +#endif
  23580. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23581. + case MISSED_TIMER_OFFSETS:
  23582. + hist = &per_cpu(missed_timer_offsets, cpu);
  23583. + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
  23584. + break;
  23585. +#endif
  23586. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  23587. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23588. + case TIMERANDWAKEUP_LATENCY:
  23589. + hist = &per_cpu(timerandwakeup_latency_hist, cpu);
  23590. + mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
  23591. + break;
  23592. +#endif
  23593. + }
  23594. +
  23595. + hist_reset(hist);
  23596. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23597. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23598. + if (latency_type == WAKEUP_LATENCY ||
  23599. + latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
  23600. + latency_type == MISSED_TIMER_OFFSETS ||
  23601. + latency_type == TIMERANDWAKEUP_LATENCY)
  23602. + clear_maxlatprocdata(mp);
  23603. +#endif
  23604. + }
  23605. +
  23606. + return size;
  23607. +}
  23608. +
  23609. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23610. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23611. +static ssize_t
  23612. +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  23613. +{
  23614. + char buf[64];
  23615. + int r;
  23616. + unsigned long *this_pid = file->private_data;
  23617. +
  23618. + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid);
  23619. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  23620. +}
  23621. +
  23622. +static ssize_t do_pid(struct file *file, const char __user *ubuf,
  23623. + size_t cnt, loff_t *ppos)
  23624. +{
  23625. + char buf[64];
  23626. + unsigned long pid;
  23627. + unsigned long *this_pid = file->private_data;
  23628. +
  23629. + if (cnt >= sizeof(buf))
  23630. + return -EINVAL;
  23631. +
  23632. + if (copy_from_user(&buf, ubuf, cnt))
  23633. + return -EFAULT;
  23634. +
  23635. + buf[cnt] = '\0';
  23636. +
  23637. + if (kstrtoul(buf, 10, &pid))
  23638. + return -EINVAL;
  23639. +
  23640. + *this_pid = pid;
  23641. +
  23642. + return cnt;
  23643. +}
  23644. +#endif
  23645. +
  23646. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23647. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23648. +static ssize_t
  23649. +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  23650. +{
  23651. + int r;
  23652. + struct maxlatproc_data *mp = file->private_data;
  23653. + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
  23654. + unsigned long long t;
  23655. + unsigned long usecs, secs;
  23656. + char *buf;
  23657. +
  23658. + if (mp->pid == -1 || mp->current_pid == -1) {
  23659. + buf = "(none)\n";
  23660. + return simple_read_from_buffer(ubuf, cnt, ppos, buf,
  23661. + strlen(buf));
  23662. + }
  23663. +
  23664. + buf = kmalloc(strmaxlen, GFP_KERNEL);
  23665. + if (buf == NULL)
  23666. + return -ENOMEM;
  23667. +
  23668. + t = ns2usecs(mp->timestamp);
  23669. + usecs = do_div(t, USEC_PER_SEC);
  23670. + secs = (unsigned long) t;
  23671. + r = snprintf(buf, strmaxlen,
  23672. + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
  23673. + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
  23674. + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
  23675. + secs, usecs);
  23676. + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  23677. + kfree(buf);
  23678. + return r;
  23679. +}
  23680. +#endif
  23681. +
  23682. +static ssize_t
  23683. +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
  23684. +{
  23685. + char buf[64];
  23686. + struct enable_data *ed = file->private_data;
  23687. + int r;
  23688. +
  23689. + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled);
  23690. + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
  23691. +}
  23692. +
  23693. +static ssize_t
  23694. +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos)
  23695. +{
  23696. + char buf[64];
  23697. + long enable;
  23698. + struct enable_data *ed = file->private_data;
  23699. +
  23700. + if (cnt >= sizeof(buf))
  23701. + return -EINVAL;
  23702. +
  23703. + if (copy_from_user(&buf, ubuf, cnt))
  23704. + return -EFAULT;
  23705. +
  23706. + buf[cnt] = 0;
  23707. +
  23708. + if (kstrtoul(buf, 10, &enable))
  23709. + return -EINVAL;
  23710. +
  23711. + if ((enable && ed->enabled) || (!enable && !ed->enabled))
  23712. + return cnt;
  23713. +
  23714. + if (enable) {
  23715. + int ret;
  23716. +
  23717. + switch (ed->latency_type) {
  23718. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  23719. + case PREEMPTIRQSOFF_LATENCY:
  23720. + ret = register_trace_preemptirqsoff_hist(
  23721. + probe_preemptirqsoff_hist, NULL);
  23722. + if (ret) {
  23723. + pr_info("wakeup trace: Couldn't assign "
  23724. + "probe_preemptirqsoff_hist "
  23725. + "to trace_preemptirqsoff_hist\n");
  23726. + return ret;
  23727. + }
  23728. + break;
  23729. +#endif
  23730. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23731. + case WAKEUP_LATENCY:
  23732. + ret = register_trace_sched_wakeup(
  23733. + probe_wakeup_latency_hist_start, NULL);
  23734. + if (ret) {
  23735. + pr_info("wakeup trace: Couldn't assign "
  23736. + "probe_wakeup_latency_hist_start "
  23737. + "to trace_sched_wakeup\n");
  23738. + return ret;
  23739. + }
  23740. + ret = register_trace_sched_wakeup_new(
  23741. + probe_wakeup_latency_hist_start, NULL);
  23742. + if (ret) {
  23743. + pr_info("wakeup trace: Couldn't assign "
  23744. + "probe_wakeup_latency_hist_start "
  23745. + "to trace_sched_wakeup_new\n");
  23746. + unregister_trace_sched_wakeup(
  23747. + probe_wakeup_latency_hist_start, NULL);
  23748. + return ret;
  23749. + }
  23750. + ret = register_trace_sched_switch(
  23751. + probe_wakeup_latency_hist_stop, NULL);
  23752. + if (ret) {
  23753. + pr_info("wakeup trace: Couldn't assign "
  23754. + "probe_wakeup_latency_hist_stop "
  23755. + "to trace_sched_switch\n");
  23756. + unregister_trace_sched_wakeup(
  23757. + probe_wakeup_latency_hist_start, NULL);
  23758. + unregister_trace_sched_wakeup_new(
  23759. + probe_wakeup_latency_hist_start, NULL);
  23760. + return ret;
  23761. + }
  23762. + ret = register_trace_sched_migrate_task(
  23763. + probe_sched_migrate_task, NULL);
  23764. + if (ret) {
  23765. + pr_info("wakeup trace: Couldn't assign "
  23766. + "probe_sched_migrate_task "
  23767. + "to trace_sched_migrate_task\n");
  23768. + unregister_trace_sched_wakeup(
  23769. + probe_wakeup_latency_hist_start, NULL);
  23770. + unregister_trace_sched_wakeup_new(
  23771. + probe_wakeup_latency_hist_start, NULL);
  23772. + unregister_trace_sched_switch(
  23773. + probe_wakeup_latency_hist_stop, NULL);
  23774. + return ret;
  23775. + }
  23776. + break;
  23777. +#endif
  23778. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23779. + case MISSED_TIMER_OFFSETS:
  23780. + ret = register_trace_hrtimer_interrupt(
  23781. + probe_hrtimer_interrupt, NULL);
  23782. + if (ret) {
  23783. + pr_info("wakeup trace: Couldn't assign "
  23784. + "probe_hrtimer_interrupt "
  23785. + "to trace_hrtimer_interrupt\n");
  23786. + return ret;
  23787. + }
  23788. + break;
  23789. +#endif
  23790. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  23791. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23792. + case TIMERANDWAKEUP_LATENCY:
  23793. + if (!wakeup_latency_enabled_data.enabled ||
  23794. + !missed_timer_offsets_enabled_data.enabled)
  23795. + return -EINVAL;
  23796. + break;
  23797. +#endif
  23798. + default:
  23799. + break;
  23800. + }
  23801. + } else {
  23802. + switch (ed->latency_type) {
  23803. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  23804. + case PREEMPTIRQSOFF_LATENCY:
  23805. + {
  23806. + int cpu;
  23807. +
  23808. + unregister_trace_preemptirqsoff_hist(
  23809. + probe_preemptirqsoff_hist, NULL);
  23810. + for_each_online_cpu(cpu) {
  23811. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  23812. + per_cpu(hist_irqsoff_counting,
  23813. + cpu) = 0;
  23814. +#endif
  23815. +#ifdef CONFIG_PREEMPT_OFF_HIST
  23816. + per_cpu(hist_preemptoff_counting,
  23817. + cpu) = 0;
  23818. +#endif
  23819. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  23820. + per_cpu(hist_preemptirqsoff_counting,
  23821. + cpu) = 0;
  23822. +#endif
  23823. + }
  23824. + }
  23825. + break;
  23826. +#endif
  23827. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23828. + case WAKEUP_LATENCY:
  23829. + {
  23830. + int cpu;
  23831. +
  23832. + unregister_trace_sched_wakeup(
  23833. + probe_wakeup_latency_hist_start, NULL);
  23834. + unregister_trace_sched_wakeup_new(
  23835. + probe_wakeup_latency_hist_start, NULL);
  23836. + unregister_trace_sched_switch(
  23837. + probe_wakeup_latency_hist_stop, NULL);
  23838. + unregister_trace_sched_migrate_task(
  23839. + probe_sched_migrate_task, NULL);
  23840. +
  23841. + for_each_online_cpu(cpu) {
  23842. + per_cpu(wakeup_task, cpu) = NULL;
  23843. + per_cpu(wakeup_sharedprio, cpu) = 0;
  23844. + }
  23845. + }
  23846. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23847. + timerandwakeup_enabled_data.enabled = 0;
  23848. +#endif
  23849. + break;
  23850. +#endif
  23851. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  23852. + case MISSED_TIMER_OFFSETS:
  23853. + unregister_trace_hrtimer_interrupt(
  23854. + probe_hrtimer_interrupt, NULL);
  23855. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23856. + timerandwakeup_enabled_data.enabled = 0;
  23857. +#endif
  23858. + break;
  23859. +#endif
  23860. + default:
  23861. + break;
  23862. + }
  23863. + }
  23864. + ed->enabled = enable;
  23865. + return cnt;
  23866. +}
  23867. +
  23868. +static const struct file_operations latency_hist_reset_fops = {
  23869. + .open = tracing_open_generic,
  23870. + .write = latency_hist_reset,
  23871. +};
  23872. +
  23873. +static const struct file_operations enable_fops = {
  23874. + .open = tracing_open_generic,
  23875. + .read = show_enable,
  23876. + .write = do_enable,
  23877. +};
  23878. +
  23879. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  23880. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  23881. +static const struct file_operations pid_fops = {
  23882. + .open = tracing_open_generic,
  23883. + .read = show_pid,
  23884. + .write = do_pid,
  23885. +};
  23886. +
  23887. +static const struct file_operations maxlatproc_fops = {
  23888. + .open = tracing_open_generic,
  23889. + .read = show_maxlatproc,
  23890. +};
  23891. +#endif
  23892. +
  23893. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  23894. +static notrace void probe_preemptirqsoff_hist(void *v, int reason,
  23895. + int starthist)
  23896. +{
  23897. + int cpu = raw_smp_processor_id();
  23898. + int time_set = 0;
  23899. +
  23900. + if (starthist) {
  23901. + cycle_t uninitialized_var(start);
  23902. +
  23903. + if (!preempt_count() && !irqs_disabled())
  23904. + return;
  23905. +
  23906. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  23907. + if ((reason == IRQS_OFF || reason == TRACE_START) &&
  23908. + !per_cpu(hist_irqsoff_counting, cpu)) {
  23909. + per_cpu(hist_irqsoff_counting, cpu) = 1;
  23910. + start = ftrace_now(cpu);
  23911. + time_set++;
  23912. + per_cpu(hist_irqsoff_start, cpu) = start;
  23913. + }
  23914. +#endif
  23915. +
  23916. +#ifdef CONFIG_PREEMPT_OFF_HIST
  23917. + if ((reason == PREEMPT_OFF || reason == TRACE_START) &&
  23918. + !per_cpu(hist_preemptoff_counting, cpu)) {
  23919. + per_cpu(hist_preemptoff_counting, cpu) = 1;
  23920. + if (!(time_set++))
  23921. + start = ftrace_now(cpu);
  23922. + per_cpu(hist_preemptoff_start, cpu) = start;
  23923. + }
  23924. +#endif
  23925. +
  23926. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  23927. + if (per_cpu(hist_irqsoff_counting, cpu) &&
  23928. + per_cpu(hist_preemptoff_counting, cpu) &&
  23929. + !per_cpu(hist_preemptirqsoff_counting, cpu)) {
  23930. + per_cpu(hist_preemptirqsoff_counting, cpu) = 1;
  23931. + if (!time_set)
  23932. + start = ftrace_now(cpu);
  23933. + per_cpu(hist_preemptirqsoff_start, cpu) = start;
  23934. + }
  23935. +#endif
  23936. + } else {
  23937. + cycle_t uninitialized_var(stop);
  23938. +
  23939. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  23940. + if ((reason == IRQS_ON || reason == TRACE_STOP) &&
  23941. + per_cpu(hist_irqsoff_counting, cpu)) {
  23942. + cycle_t start = per_cpu(hist_irqsoff_start, cpu);
  23943. +
  23944. + stop = ftrace_now(cpu);
  23945. + time_set++;
  23946. + if (start) {
  23947. + long latency = ((long) (stop - start)) /
  23948. + NSECS_PER_USECS;
  23949. +
  23950. + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
  23951. + stop, NULL);
  23952. + }
  23953. + per_cpu(hist_irqsoff_counting, cpu) = 0;
  23954. + }
  23955. +#endif
  23956. +
  23957. +#ifdef CONFIG_PREEMPT_OFF_HIST
  23958. + if ((reason == PREEMPT_ON || reason == TRACE_STOP) &&
  23959. + per_cpu(hist_preemptoff_counting, cpu)) {
  23960. + cycle_t start = per_cpu(hist_preemptoff_start, cpu);
  23961. +
  23962. + if (!(time_set++))
  23963. + stop = ftrace_now(cpu);
  23964. + if (start) {
  23965. + long latency = ((long) (stop - start)) /
  23966. + NSECS_PER_USECS;
  23967. +
  23968. + latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
  23969. + 0, stop, NULL);
  23970. + }
  23971. + per_cpu(hist_preemptoff_counting, cpu) = 0;
  23972. + }
  23973. +#endif
  23974. +
  23975. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  23976. + if ((!per_cpu(hist_irqsoff_counting, cpu) ||
  23977. + !per_cpu(hist_preemptoff_counting, cpu)) &&
  23978. + per_cpu(hist_preemptirqsoff_counting, cpu)) {
  23979. + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu);
  23980. +
  23981. + if (!time_set)
  23982. + stop = ftrace_now(cpu);
  23983. + if (start) {
  23984. + long latency = ((long) (stop - start)) /
  23985. + NSECS_PER_USECS;
  23986. +
  23987. + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
  23988. + latency, 0, stop, NULL);
  23989. + }
  23990. + per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
  23991. + }
  23992. +#endif
  23993. + }
  23994. +}
  23995. +#endif
  23996. +
  23997. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  23998. +static DEFINE_RAW_SPINLOCK(wakeup_lock);
  23999. +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
  24000. + int cpu)
  24001. +{
  24002. + int old_cpu = task_cpu(task);
  24003. +
  24004. + if (cpu != old_cpu) {
  24005. + unsigned long flags;
  24006. + struct task_struct *cpu_wakeup_task;
  24007. +
  24008. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  24009. +
  24010. + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
  24011. + if (task == cpu_wakeup_task) {
  24012. + put_task_struct(cpu_wakeup_task);
  24013. + per_cpu(wakeup_task, old_cpu) = NULL;
  24014. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
  24015. + get_task_struct(cpu_wakeup_task);
  24016. + }
  24017. +
  24018. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  24019. + }
  24020. +}
  24021. +
  24022. +static notrace void probe_wakeup_latency_hist_start(void *v,
  24023. + struct task_struct *p)
  24024. +{
  24025. + unsigned long flags;
  24026. + struct task_struct *curr = current;
  24027. + int cpu = task_cpu(p);
  24028. + struct task_struct *cpu_wakeup_task;
  24029. +
  24030. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  24031. +
  24032. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  24033. +
  24034. + if (wakeup_pid) {
  24035. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  24036. + p->prio == curr->prio)
  24037. + per_cpu(wakeup_sharedprio, cpu) = 1;
  24038. + if (likely(wakeup_pid != task_pid_nr(p)))
  24039. + goto out;
  24040. + } else {
  24041. + if (likely(!rt_task(p)) ||
  24042. + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) ||
  24043. + p->prio > curr->prio)
  24044. + goto out;
  24045. + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) ||
  24046. + p->prio == curr->prio)
  24047. + per_cpu(wakeup_sharedprio, cpu) = 1;
  24048. + }
  24049. +
  24050. + if (cpu_wakeup_task)
  24051. + put_task_struct(cpu_wakeup_task);
  24052. + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p;
  24053. + get_task_struct(cpu_wakeup_task);
  24054. + cpu_wakeup_task->preempt_timestamp_hist =
  24055. + ftrace_now(raw_smp_processor_id());
  24056. +out:
  24057. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  24058. +}
  24059. +
  24060. +static notrace void probe_wakeup_latency_hist_stop(void *v,
  24061. + struct task_struct *prev, struct task_struct *next)
  24062. +{
  24063. + unsigned long flags;
  24064. + int cpu = task_cpu(next);
  24065. + long latency;
  24066. + cycle_t stop;
  24067. + struct task_struct *cpu_wakeup_task;
  24068. +
  24069. + raw_spin_lock_irqsave(&wakeup_lock, flags);
  24070. +
  24071. + cpu_wakeup_task = per_cpu(wakeup_task, cpu);
  24072. +
  24073. + if (cpu_wakeup_task == NULL)
  24074. + goto out;
  24075. +
  24076. + /* Already running? */
  24077. + if (unlikely(current == cpu_wakeup_task))
  24078. + goto out_reset;
  24079. +
  24080. + if (next != cpu_wakeup_task) {
  24081. + if (next->prio < cpu_wakeup_task->prio)
  24082. + goto out_reset;
  24083. +
  24084. + if (next->prio == cpu_wakeup_task->prio)
  24085. + per_cpu(wakeup_sharedprio, cpu) = 1;
  24086. +
  24087. + goto out;
  24088. + }
  24089. +
  24090. + if (current->prio == cpu_wakeup_task->prio)
  24091. + per_cpu(wakeup_sharedprio, cpu) = 1;
  24092. +
  24093. + /*
  24094. + * The task we are waiting for is about to be switched to.
  24095. + * Calculate latency and store it in histogram.
  24096. + */
  24097. + stop = ftrace_now(raw_smp_processor_id());
  24098. +
  24099. + latency = ((long) (stop - next->preempt_timestamp_hist)) /
  24100. + NSECS_PER_USECS;
  24101. +
  24102. + if (per_cpu(wakeup_sharedprio, cpu)) {
  24103. + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
  24104. + next);
  24105. + per_cpu(wakeup_sharedprio, cpu) = 0;
  24106. + } else {
  24107. + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
  24108. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  24109. + if (timerandwakeup_enabled_data.enabled) {
  24110. + latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
  24111. + next->timer_offset + latency, next->timer_offset,
  24112. + stop, next);
  24113. + }
  24114. +#endif
  24115. + }
  24116. +
  24117. +out_reset:
  24118. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  24119. + next->timer_offset = 0;
  24120. +#endif
  24121. + put_task_struct(cpu_wakeup_task);
  24122. + per_cpu(wakeup_task, cpu) = NULL;
  24123. +out:
  24124. + raw_spin_unlock_irqrestore(&wakeup_lock, flags);
  24125. +}
  24126. +#endif
  24127. +
  24128. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  24129. +static notrace void probe_hrtimer_interrupt(void *v, int cpu,
  24130. + long long latency_ns, struct task_struct *curr,
  24131. + struct task_struct *task)
  24132. +{
  24133. + if (latency_ns <= 0 && task != NULL && rt_task(task) &&
  24134. + (task->prio < curr->prio ||
  24135. + (task->prio == curr->prio &&
  24136. + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) {
  24137. + long latency;
  24138. + cycle_t now;
  24139. +
  24140. + if (missed_timer_offsets_pid) {
  24141. + if (likely(missed_timer_offsets_pid !=
  24142. + task_pid_nr(task)))
  24143. + return;
  24144. + }
  24145. +
  24146. + now = ftrace_now(cpu);
  24147. + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
  24148. + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
  24149. + task);
  24150. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  24151. + task->timer_offset = latency;
  24152. +#endif
  24153. + }
  24154. +}
  24155. +#endif
  24156. +
  24157. +static __init int latency_hist_init(void)
  24158. +{
  24159. + struct dentry *latency_hist_root = NULL;
  24160. + struct dentry *dentry;
  24161. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  24162. + struct dentry *dentry_sharedprio;
  24163. +#endif
  24164. + struct dentry *entry;
  24165. + struct dentry *enable_root;
  24166. + int i = 0;
  24167. + struct hist_data *my_hist;
  24168. + char name[64];
  24169. + char *cpufmt = "CPU%d";
  24170. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
  24171. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24172. + char *cpufmt_maxlatproc = "max_latency-CPU%d";
  24173. + struct maxlatproc_data *mp = NULL;
  24174. +#endif
  24175. +
  24176. + dentry = tracing_init_dentry();
  24177. + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry);
  24178. + enable_root = debugfs_create_dir("enable", latency_hist_root);
  24179. +
  24180. +#ifdef CONFIG_INTERRUPT_OFF_HIST
  24181. + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root);
  24182. + for_each_possible_cpu(i) {
  24183. + sprintf(name, cpufmt, i);
  24184. + entry = debugfs_create_file(name, 0444, dentry,
  24185. + &per_cpu(irqsoff_hist, i), &latency_hist_fops);
  24186. + my_hist = &per_cpu(irqsoff_hist, i);
  24187. + atomic_set(&my_hist->hist_mode, 1);
  24188. + my_hist->min_lat = LONG_MAX;
  24189. + }
  24190. + entry = debugfs_create_file("reset", 0644, dentry,
  24191. + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops);
  24192. +#endif
  24193. +
  24194. +#ifdef CONFIG_PREEMPT_OFF_HIST
  24195. + dentry = debugfs_create_dir(preemptoff_hist_dir,
  24196. + latency_hist_root);
  24197. + for_each_possible_cpu(i) {
  24198. + sprintf(name, cpufmt, i);
  24199. + entry = debugfs_create_file(name, 0444, dentry,
  24200. + &per_cpu(preemptoff_hist, i), &latency_hist_fops);
  24201. + my_hist = &per_cpu(preemptoff_hist, i);
  24202. + atomic_set(&my_hist->hist_mode, 1);
  24203. + my_hist->min_lat = LONG_MAX;
  24204. + }
  24205. + entry = debugfs_create_file("reset", 0644, dentry,
  24206. + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops);
  24207. +#endif
  24208. +
  24209. +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
  24210. + dentry = debugfs_create_dir(preemptirqsoff_hist_dir,
  24211. + latency_hist_root);
  24212. + for_each_possible_cpu(i) {
  24213. + sprintf(name, cpufmt, i);
  24214. + entry = debugfs_create_file(name, 0444, dentry,
  24215. + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops);
  24216. + my_hist = &per_cpu(preemptirqsoff_hist, i);
  24217. + atomic_set(&my_hist->hist_mode, 1);
  24218. + my_hist->min_lat = LONG_MAX;
  24219. + }
  24220. + entry = debugfs_create_file("reset", 0644, dentry,
  24221. + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops);
  24222. +#endif
  24223. +
  24224. +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
  24225. + entry = debugfs_create_file("preemptirqsoff", 0644,
  24226. + enable_root, (void *)&preemptirqsoff_enabled_data,
  24227. + &enable_fops);
  24228. +#endif
  24229. +
  24230. +#ifdef CONFIG_WAKEUP_LATENCY_HIST
  24231. + dentry = debugfs_create_dir(wakeup_latency_hist_dir,
  24232. + latency_hist_root);
  24233. + dentry_sharedprio = debugfs_create_dir(
  24234. + wakeup_latency_hist_dir_sharedprio, dentry);
  24235. + for_each_possible_cpu(i) {
  24236. + sprintf(name, cpufmt, i);
  24237. +
  24238. + entry = debugfs_create_file(name, 0444, dentry,
  24239. + &per_cpu(wakeup_latency_hist, i),
  24240. + &latency_hist_fops);
  24241. + my_hist = &per_cpu(wakeup_latency_hist, i);
  24242. + atomic_set(&my_hist->hist_mode, 1);
  24243. + my_hist->min_lat = LONG_MAX;
  24244. +
  24245. + entry = debugfs_create_file(name, 0444, dentry_sharedprio,
  24246. + &per_cpu(wakeup_latency_hist_sharedprio, i),
  24247. + &latency_hist_fops);
  24248. + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i);
  24249. + atomic_set(&my_hist->hist_mode, 1);
  24250. + my_hist->min_lat = LONG_MAX;
  24251. +
  24252. + sprintf(name, cpufmt_maxlatproc, i);
  24253. +
  24254. + mp = &per_cpu(wakeup_maxlatproc, i);
  24255. + entry = debugfs_create_file(name, 0444, dentry, mp,
  24256. + &maxlatproc_fops);
  24257. + clear_maxlatprocdata(mp);
  24258. +
  24259. + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i);
  24260. + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp,
  24261. + &maxlatproc_fops);
  24262. + clear_maxlatprocdata(mp);
  24263. + }
  24264. + entry = debugfs_create_file("pid", 0644, dentry,
  24265. + (void *)&wakeup_pid, &pid_fops);
  24266. + entry = debugfs_create_file("reset", 0644, dentry,
  24267. + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops);
  24268. + entry = debugfs_create_file("reset", 0644, dentry_sharedprio,
  24269. + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops);
  24270. + entry = debugfs_create_file("wakeup", 0644,
  24271. + enable_root, (void *)&wakeup_latency_enabled_data,
  24272. + &enable_fops);
  24273. +#endif
  24274. +
  24275. +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
  24276. + dentry = debugfs_create_dir(missed_timer_offsets_dir,
  24277. + latency_hist_root);
  24278. + for_each_possible_cpu(i) {
  24279. + sprintf(name, cpufmt, i);
  24280. + entry = debugfs_create_file(name, 0444, dentry,
  24281. + &per_cpu(missed_timer_offsets, i), &latency_hist_fops);
  24282. + my_hist = &per_cpu(missed_timer_offsets, i);
  24283. + atomic_set(&my_hist->hist_mode, 1);
  24284. + my_hist->min_lat = LONG_MAX;
  24285. +
  24286. + sprintf(name, cpufmt_maxlatproc, i);
  24287. + mp = &per_cpu(missed_timer_offsets_maxlatproc, i);
  24288. + entry = debugfs_create_file(name, 0444, dentry, mp,
  24289. + &maxlatproc_fops);
  24290. + clear_maxlatprocdata(mp);
  24291. + }
  24292. + entry = debugfs_create_file("pid", 0644, dentry,
  24293. + (void *)&missed_timer_offsets_pid, &pid_fops);
  24294. + entry = debugfs_create_file("reset", 0644, dentry,
  24295. + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops);
  24296. + entry = debugfs_create_file("missed_timer_offsets", 0644,
  24297. + enable_root, (void *)&missed_timer_offsets_enabled_data,
  24298. + &enable_fops);
  24299. +#endif
  24300. +
  24301. +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
  24302. + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
  24303. + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
  24304. + latency_hist_root);
  24305. + for_each_possible_cpu(i) {
  24306. + sprintf(name, cpufmt, i);
  24307. + entry = debugfs_create_file(name, 0444, dentry,
  24308. + &per_cpu(timerandwakeup_latency_hist, i),
  24309. + &latency_hist_fops);
  24310. + my_hist = &per_cpu(timerandwakeup_latency_hist, i);
  24311. + atomic_set(&my_hist->hist_mode, 1);
  24312. + my_hist->min_lat = LONG_MAX;
  24313. +
  24314. + sprintf(name, cpufmt_maxlatproc, i);
  24315. + mp = &per_cpu(timerandwakeup_maxlatproc, i);
  24316. + entry = debugfs_create_file(name, 0444, dentry, mp,
  24317. + &maxlatproc_fops);
  24318. + clear_maxlatprocdata(mp);
  24319. + }
  24320. + entry = debugfs_create_file("reset", 0644, dentry,
  24321. + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops);
  24322. + entry = debugfs_create_file("timerandwakeup", 0644,
  24323. + enable_root, (void *)&timerandwakeup_enabled_data,
  24324. + &enable_fops);
  24325. +#endif
  24326. + return 0;
  24327. +}
  24328. +
  24329. +device_initcall(latency_hist_init);
  24330. diff -Nur linux-4.1.39.orig/kernel/trace/Makefile linux-4.1.39/kernel/trace/Makefile
  24331. --- linux-4.1.39.orig/kernel/trace/Makefile 2017-03-13 21:04:36.000000000 +0100
  24332. +++ linux-4.1.39/kernel/trace/Makefile 2017-04-18 17:56:30.641398216 +0200
  24333. @@ -36,6 +36,10 @@
  24334. obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
  24335. obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
  24336. obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
  24337. +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o
  24338. +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o
  24339. +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o
  24340. +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o
  24341. obj-$(CONFIG_NOP_TRACER) += trace_nop.o
  24342. obj-$(CONFIG_STACK_TRACER) += trace_stack.o
  24343. obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
  24344. diff -Nur linux-4.1.39.orig/kernel/trace/trace.c linux-4.1.39/kernel/trace/trace.c
  24345. --- linux-4.1.39.orig/kernel/trace/trace.c 2017-03-13 21:04:36.000000000 +0100
  24346. +++ linux-4.1.39/kernel/trace/trace.c 2017-04-18 17:56:30.641398216 +0200
  24347. @@ -1630,6 +1630,7 @@
  24348. struct task_struct *tsk = current;
  24349. entry->preempt_count = pc & 0xff;
  24350. + entry->preempt_lazy_count = preempt_lazy_count();
  24351. entry->pid = (tsk) ? tsk->pid : 0;
  24352. entry->flags =
  24353. #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
  24354. @@ -1639,8 +1640,11 @@
  24355. #endif
  24356. ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
  24357. ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
  24358. - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
  24359. + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
  24360. + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) |
  24361. (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
  24362. +
  24363. + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
  24364. }
  24365. EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
  24366. @@ -2558,14 +2562,17 @@
  24367. static void print_lat_help_header(struct seq_file *m)
  24368. {
  24369. - seq_puts(m, "# _------=> CPU# \n"
  24370. - "# / _-----=> irqs-off \n"
  24371. - "# | / _----=> need-resched \n"
  24372. - "# || / _---=> hardirq/softirq \n"
  24373. - "# ||| / _--=> preempt-depth \n"
  24374. - "# |||| / delay \n"
  24375. - "# cmd pid ||||| time | caller \n"
  24376. - "# \\ / ||||| \\ | / \n");
  24377. + seq_puts(m, "# _--------=> CPU# \n"
  24378. + "# / _-------=> irqs-off \n"
  24379. + "# | / _------=> need-resched \n"
  24380. + "# || / _-----=> need-resched_lazy \n"
  24381. + "# ||| / _----=> hardirq/softirq \n"
  24382. + "# |||| / _---=> preempt-depth \n"
  24383. + "# ||||| / _--=> preempt-lazy-depth\n"
  24384. + "# |||||| / _-=> migrate-disable \n"
  24385. + "# ||||||| / delay \n"
  24386. + "# cmd pid |||||||| time | caller \n"
  24387. + "# \\ / |||||||| \\ | / \n");
  24388. }
  24389. static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
  24390. @@ -2591,11 +2598,14 @@
  24391. print_event_info(buf, m);
  24392. seq_puts(m, "# _-----=> irqs-off\n"
  24393. "# / _----=> need-resched\n"
  24394. - "# | / _---=> hardirq/softirq\n"
  24395. - "# || / _--=> preempt-depth\n"
  24396. - "# ||| / delay\n"
  24397. - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
  24398. - "# | | | |||| | |\n");
  24399. + "# |/ _-----=> need-resched_lazy\n"
  24400. + "# || / _---=> hardirq/softirq\n"
  24401. + "# ||| / _--=> preempt-depth\n"
  24402. + "# |||| / _-=> preempt-lazy-depth\n"
  24403. + "# ||||| / _-=> migrate-disable \n"
  24404. + "# |||||| / delay\n"
  24405. + "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n"
  24406. + "# | | | ||||||| | |\n");
  24407. }
  24408. void
  24409. diff -Nur linux-4.1.39.orig/kernel/trace/trace_events.c linux-4.1.39/kernel/trace/trace_events.c
  24410. --- linux-4.1.39.orig/kernel/trace/trace_events.c 2017-03-13 21:04:36.000000000 +0100
  24411. +++ linux-4.1.39/kernel/trace/trace_events.c 2017-04-18 17:56:30.645398372 +0200
  24412. @@ -162,6 +162,8 @@
  24413. __common_field(unsigned char, flags);
  24414. __common_field(unsigned char, preempt_count);
  24415. __common_field(int, pid);
  24416. + __common_field(unsigned short, migrate_disable);
  24417. + __common_field(unsigned short, padding);
  24418. return ret;
  24419. }
  24420. @@ -198,6 +200,14 @@
  24421. local_save_flags(fbuffer->flags);
  24422. fbuffer->pc = preempt_count();
  24423. + /*
  24424. + * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
  24425. + * preemption (adding one to the preempt_count). Since we are
  24426. + * interested in the preempt_count at the time the tracepoint was
  24427. + * hit, we need to subtract one to offset the increment.
  24428. + */
  24429. + if (IS_ENABLED(CONFIG_PREEMPT))
  24430. + fbuffer->pc--;
  24431. fbuffer->ftrace_file = ftrace_file;
  24432. fbuffer->event =
  24433. diff -Nur linux-4.1.39.orig/kernel/trace/trace.h linux-4.1.39/kernel/trace/trace.h
  24434. --- linux-4.1.39.orig/kernel/trace/trace.h 2017-03-13 21:04:36.000000000 +0100
  24435. +++ linux-4.1.39/kernel/trace/trace.h 2017-04-18 17:56:30.641398216 +0200
  24436. @@ -120,6 +120,7 @@
  24437. * NEED_RESCHED - reschedule is requested
  24438. * HARDIRQ - inside an interrupt handler
  24439. * SOFTIRQ - inside a softirq handler
  24440. + * NEED_RESCHED_LAZY - lazy reschedule is requested
  24441. */
  24442. enum trace_flag_type {
  24443. TRACE_FLAG_IRQS_OFF = 0x01,
  24444. @@ -128,6 +129,7 @@
  24445. TRACE_FLAG_HARDIRQ = 0x08,
  24446. TRACE_FLAG_SOFTIRQ = 0x10,
  24447. TRACE_FLAG_PREEMPT_RESCHED = 0x20,
  24448. + TRACE_FLAG_NEED_RESCHED_LAZY = 0x40,
  24449. };
  24450. #define TRACE_BUF_SIZE 1024
  24451. diff -Nur linux-4.1.39.orig/kernel/trace/trace_irqsoff.c linux-4.1.39/kernel/trace/trace_irqsoff.c
  24452. --- linux-4.1.39.orig/kernel/trace/trace_irqsoff.c 2017-03-13 21:04:36.000000000 +0100
  24453. +++ linux-4.1.39/kernel/trace/trace_irqsoff.c 2017-04-18 17:56:30.645398372 +0200
  24454. @@ -13,6 +13,7 @@
  24455. #include <linux/uaccess.h>
  24456. #include <linux/module.h>
  24457. #include <linux/ftrace.h>
  24458. +#include <trace/events/hist.h>
  24459. #include "trace.h"
  24460. @@ -433,11 +434,13 @@
  24461. {
  24462. if (preempt_trace() || irq_trace())
  24463. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  24464. + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1);
  24465. }
  24466. EXPORT_SYMBOL_GPL(start_critical_timings);
  24467. void stop_critical_timings(void)
  24468. {
  24469. + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0);
  24470. if (preempt_trace() || irq_trace())
  24471. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  24472. }
  24473. @@ -447,6 +450,7 @@
  24474. #ifdef CONFIG_PROVE_LOCKING
  24475. void time_hardirqs_on(unsigned long a0, unsigned long a1)
  24476. {
  24477. + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0);
  24478. if (!preempt_trace() && irq_trace())
  24479. stop_critical_timing(a0, a1);
  24480. }
  24481. @@ -455,6 +459,7 @@
  24482. {
  24483. if (!preempt_trace() && irq_trace())
  24484. start_critical_timing(a0, a1);
  24485. + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1);
  24486. }
  24487. #else /* !CONFIG_PROVE_LOCKING */
  24488. @@ -480,6 +485,7 @@
  24489. */
  24490. void trace_hardirqs_on(void)
  24491. {
  24492. + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0);
  24493. if (!preempt_trace() && irq_trace())
  24494. stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  24495. }
  24496. @@ -489,11 +495,13 @@
  24497. {
  24498. if (!preempt_trace() && irq_trace())
  24499. start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  24500. + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1);
  24501. }
  24502. EXPORT_SYMBOL(trace_hardirqs_off);
  24503. __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
  24504. {
  24505. + trace_preemptirqsoff_hist(IRQS_ON, 0);
  24506. if (!preempt_trace() && irq_trace())
  24507. stop_critical_timing(CALLER_ADDR0, caller_addr);
  24508. }
  24509. @@ -503,6 +511,7 @@
  24510. {
  24511. if (!preempt_trace() && irq_trace())
  24512. start_critical_timing(CALLER_ADDR0, caller_addr);
  24513. + trace_preemptirqsoff_hist(IRQS_OFF, 1);
  24514. }
  24515. EXPORT_SYMBOL(trace_hardirqs_off_caller);
  24516. @@ -512,12 +521,14 @@
  24517. #ifdef CONFIG_PREEMPT_TRACER
  24518. void trace_preempt_on(unsigned long a0, unsigned long a1)
  24519. {
  24520. + trace_preemptirqsoff_hist(PREEMPT_ON, 0);
  24521. if (preempt_trace() && !irq_trace())
  24522. stop_critical_timing(a0, a1);
  24523. }
  24524. void trace_preempt_off(unsigned long a0, unsigned long a1)
  24525. {
  24526. + trace_preemptirqsoff_hist(PREEMPT_ON, 1);
  24527. if (preempt_trace() && !irq_trace())
  24528. start_critical_timing(a0, a1);
  24529. }
  24530. diff -Nur linux-4.1.39.orig/kernel/trace/trace_output.c linux-4.1.39/kernel/trace/trace_output.c
  24531. --- linux-4.1.39.orig/kernel/trace/trace_output.c 2017-03-13 21:04:36.000000000 +0100
  24532. +++ linux-4.1.39/kernel/trace/trace_output.c 2017-04-18 17:56:30.645398372 +0200
  24533. @@ -430,6 +430,7 @@
  24534. {
  24535. char hardsoft_irq;
  24536. char need_resched;
  24537. + char need_resched_lazy;
  24538. char irqs_off;
  24539. int hardirq;
  24540. int softirq;
  24541. @@ -457,6 +458,8 @@
  24542. need_resched = '.';
  24543. break;
  24544. }
  24545. + need_resched_lazy =
  24546. + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
  24547. hardsoft_irq =
  24548. (hardirq && softirq) ? 'H' :
  24549. @@ -464,14 +467,25 @@
  24550. softirq ? 's' :
  24551. '.';
  24552. - trace_seq_printf(s, "%c%c%c",
  24553. - irqs_off, need_resched, hardsoft_irq);
  24554. + trace_seq_printf(s, "%c%c%c%c",
  24555. + irqs_off, need_resched, need_resched_lazy,
  24556. + hardsoft_irq);
  24557. if (entry->preempt_count)
  24558. trace_seq_printf(s, "%x", entry->preempt_count);
  24559. else
  24560. trace_seq_putc(s, '.');
  24561. + if (entry->preempt_lazy_count)
  24562. + trace_seq_printf(s, "%x", entry->preempt_lazy_count);
  24563. + else
  24564. + trace_seq_putc(s, '.');
  24565. +
  24566. + if (entry->migrate_disable)
  24567. + trace_seq_printf(s, "%x", entry->migrate_disable);
  24568. + else
  24569. + trace_seq_putc(s, '.');
  24570. +
  24571. return !trace_seq_has_overflowed(s);
  24572. }
  24573. diff -Nur linux-4.1.39.orig/kernel/trace/trace_sched_switch.c linux-4.1.39/kernel/trace/trace_sched_switch.c
  24574. --- linux-4.1.39.orig/kernel/trace/trace_sched_switch.c 2017-03-13 21:04:36.000000000 +0100
  24575. +++ linux-4.1.39/kernel/trace/trace_sched_switch.c 2017-04-18 17:56:30.645398372 +0200
  24576. @@ -26,7 +26,7 @@
  24577. }
  24578. static void
  24579. -probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success)
  24580. +probe_sched_wakeup(void *ignore, struct task_struct *wakee)
  24581. {
  24582. if (unlikely(!sched_ref))
  24583. return;
  24584. diff -Nur linux-4.1.39.orig/kernel/trace/trace_sched_wakeup.c linux-4.1.39/kernel/trace/trace_sched_wakeup.c
  24585. --- linux-4.1.39.orig/kernel/trace/trace_sched_wakeup.c 2017-03-13 21:04:36.000000000 +0100
  24586. +++ linux-4.1.39/kernel/trace/trace_sched_wakeup.c 2017-04-18 17:56:30.645398372 +0200
  24587. @@ -514,7 +514,7 @@
  24588. }
  24589. static void
  24590. -probe_wakeup(void *ignore, struct task_struct *p, int success)
  24591. +probe_wakeup(void *ignore, struct task_struct *p)
  24592. {
  24593. struct trace_array_cpu *data;
  24594. int cpu = smp_processor_id();
  24595. diff -Nur linux-4.1.39.orig/kernel/user.c linux-4.1.39/kernel/user.c
  24596. --- linux-4.1.39.orig/kernel/user.c 2017-03-13 21:04:36.000000000 +0100
  24597. +++ linux-4.1.39/kernel/user.c 2017-04-18 17:56:30.645398372 +0200
  24598. @@ -161,11 +161,11 @@
  24599. if (!up)
  24600. return;
  24601. - local_irq_save(flags);
  24602. + local_irq_save_nort(flags);
  24603. if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
  24604. free_user(up, flags);
  24605. else
  24606. - local_irq_restore(flags);
  24607. + local_irq_restore_nort(flags);
  24608. }
  24609. struct user_struct *alloc_uid(kuid_t uid)
  24610. diff -Nur linux-4.1.39.orig/kernel/watchdog.c linux-4.1.39/kernel/watchdog.c
  24611. --- linux-4.1.39.orig/kernel/watchdog.c 2017-03-13 21:04:36.000000000 +0100
  24612. +++ linux-4.1.39/kernel/watchdog.c 2017-04-18 17:56:30.645398372 +0200
  24613. @@ -262,6 +262,8 @@
  24614. #ifdef CONFIG_HARDLOCKUP_DETECTOR
  24615. +static DEFINE_RAW_SPINLOCK(watchdog_output_lock);
  24616. +
  24617. static struct perf_event_attr wd_hw_attr = {
  24618. .type = PERF_TYPE_HARDWARE,
  24619. .config = PERF_COUNT_HW_CPU_CYCLES,
  24620. @@ -295,13 +297,21 @@
  24621. /* only print hardlockups once */
  24622. if (__this_cpu_read(hard_watchdog_warn) == true)
  24623. return;
  24624. + /*
  24625. + * If early-printk is enabled then make sure we do not
  24626. + * lock up in printk() and kill console logging:
  24627. + */
  24628. + printk_kill();
  24629. - if (hardlockup_panic)
  24630. + if (hardlockup_panic) {
  24631. panic("Watchdog detected hard LOCKUP on cpu %d",
  24632. this_cpu);
  24633. - else
  24634. + } else {
  24635. + raw_spin_lock(&watchdog_output_lock);
  24636. WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
  24637. this_cpu);
  24638. + raw_spin_unlock(&watchdog_output_lock);
  24639. + }
  24640. __this_cpu_write(hard_watchdog_warn, true);
  24641. return;
  24642. @@ -444,6 +454,7 @@
  24643. /* kick off the timer for the hardlockup detector */
  24644. hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  24645. hrtimer->function = watchdog_timer_fn;
  24646. + hrtimer->irqsafe = 1;
  24647. /* Enable the perf event */
  24648. watchdog_nmi_enable(cpu);
  24649. diff -Nur linux-4.1.39.orig/kernel/workqueue.c linux-4.1.39/kernel/workqueue.c
  24650. --- linux-4.1.39.orig/kernel/workqueue.c 2017-03-13 21:04:36.000000000 +0100
  24651. +++ linux-4.1.39/kernel/workqueue.c 2017-04-18 17:56:30.645398372 +0200
  24652. @@ -48,6 +48,8 @@
  24653. #include <linux/nodemask.h>
  24654. #include <linux/moduleparam.h>
  24655. #include <linux/uaccess.h>
  24656. +#include <linux/locallock.h>
  24657. +#include <linux/delay.h>
  24658. #include "workqueue_internal.h"
  24659. @@ -121,11 +123,16 @@
  24660. * cpu or grabbing pool->lock is enough for read access. If
  24661. * POOL_DISASSOCIATED is set, it's identical to L.
  24662. *
  24663. + * On RT we need the extra protection via rt_lock_idle_list() for
  24664. + * the list manipulations against read access from
  24665. + * wq_worker_sleeping(). All other places are nicely serialized via
  24666. + * pool->lock.
  24667. + *
  24668. * A: pool->attach_mutex protected.
  24669. *
  24670. * PL: wq_pool_mutex protected.
  24671. *
  24672. - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
  24673. + * PR: wq_pool_mutex protected for writes. RCU protected for reads.
  24674. *
  24675. * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
  24676. *
  24677. @@ -134,7 +141,7 @@
  24678. *
  24679. * WQ: wq->mutex protected.
  24680. *
  24681. - * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
  24682. + * WR: wq->mutex protected for writes. RCU protected for reads.
  24683. *
  24684. * MD: wq_mayday_lock protected.
  24685. */
  24686. @@ -183,7 +190,7 @@
  24687. atomic_t nr_running ____cacheline_aligned_in_smp;
  24688. /*
  24689. - * Destruction of pool is sched-RCU protected to allow dereferences
  24690. + * Destruction of pool is RCU protected to allow dereferences
  24691. * from get_work_pool().
  24692. */
  24693. struct rcu_head rcu;
  24694. @@ -212,7 +219,7 @@
  24695. /*
  24696. * Release of unbound pwq is punted to system_wq. See put_pwq()
  24697. * and pwq_unbound_release_workfn() for details. pool_workqueue
  24698. - * itself is also sched-RCU protected so that the first pwq can be
  24699. + * itself is also RCU protected so that the first pwq can be
  24700. * determined without grabbing wq->mutex.
  24701. */
  24702. struct work_struct unbound_release_work;
  24703. @@ -334,6 +341,8 @@
  24704. struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
  24705. EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
  24706. +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock);
  24707. +
  24708. static int worker_thread(void *__worker);
  24709. static void copy_workqueue_attrs(struct workqueue_attrs *to,
  24710. const struct workqueue_attrs *from);
  24711. @@ -343,14 +352,14 @@
  24712. #include <trace/events/workqueue.h>
  24713. #define assert_rcu_or_pool_mutex() \
  24714. - rcu_lockdep_assert(rcu_read_lock_sched_held() || \
  24715. + rcu_lockdep_assert(rcu_read_lock_held() || \
  24716. lockdep_is_held(&wq_pool_mutex), \
  24717. - "sched RCU or wq_pool_mutex should be held")
  24718. + "RCU or wq_pool_mutex should be held")
  24719. #define assert_rcu_or_wq_mutex(wq) \
  24720. - rcu_lockdep_assert(rcu_read_lock_sched_held() || \
  24721. + rcu_lockdep_assert(rcu_read_lock_held() || \
  24722. lockdep_is_held(&wq->mutex), \
  24723. - "sched RCU or wq->mutex should be held")
  24724. + "RCU or wq->mutex should be held")
  24725. #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
  24726. rcu_lockdep_assert(rcu_read_lock_sched_held() || \
  24727. @@ -368,7 +377,7 @@
  24728. * @pool: iteration cursor
  24729. * @pi: integer used for iteration
  24730. *
  24731. - * This must be called either with wq_pool_mutex held or sched RCU read
  24732. + * This must be called either with wq_pool_mutex held or RCU read
  24733. * locked. If the pool needs to be used beyond the locking in effect, the
  24734. * caller is responsible for guaranteeing that the pool stays online.
  24735. *
  24736. @@ -400,7 +409,7 @@
  24737. * @pwq: iteration cursor
  24738. * @wq: the target workqueue
  24739. *
  24740. - * This must be called either with wq->mutex held or sched RCU read locked.
  24741. + * This must be called either with wq->mutex held or RCU read locked.
  24742. * If the pwq needs to be used beyond the locking in effect, the caller is
  24743. * responsible for guaranteeing that the pwq stays online.
  24744. *
  24745. @@ -412,6 +421,31 @@
  24746. if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
  24747. else
  24748. +#ifdef CONFIG_PREEMPT_RT_BASE
  24749. +static inline void rt_lock_idle_list(struct worker_pool *pool)
  24750. +{
  24751. + preempt_disable();
  24752. +}
  24753. +static inline void rt_unlock_idle_list(struct worker_pool *pool)
  24754. +{
  24755. + preempt_enable();
  24756. +}
  24757. +static inline void sched_lock_idle_list(struct worker_pool *pool) { }
  24758. +static inline void sched_unlock_idle_list(struct worker_pool *pool) { }
  24759. +#else
  24760. +static inline void rt_lock_idle_list(struct worker_pool *pool) { }
  24761. +static inline void rt_unlock_idle_list(struct worker_pool *pool) { }
  24762. +static inline void sched_lock_idle_list(struct worker_pool *pool)
  24763. +{
  24764. + spin_lock_irq(&pool->lock);
  24765. +}
  24766. +static inline void sched_unlock_idle_list(struct worker_pool *pool)
  24767. +{
  24768. + spin_unlock_irq(&pool->lock);
  24769. +}
  24770. +#endif
  24771. +
  24772. +
  24773. #ifdef CONFIG_DEBUG_OBJECTS_WORK
  24774. static struct debug_obj_descr work_debug_descr;
  24775. @@ -562,8 +596,7 @@
  24776. * @wq: the target workqueue
  24777. * @node: the node ID
  24778. *
  24779. - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
  24780. - * read locked.
  24781. + * This must be called with any of wq_pool_mutex, wq->mutex or RCU read locked.
  24782. * If the pwq needs to be used beyond the locking in effect, the caller is
  24783. * responsible for guaranteeing that the pwq stays online.
  24784. *
  24785. @@ -706,8 +739,8 @@
  24786. * @work: the work item of interest
  24787. *
  24788. * Pools are created and destroyed under wq_pool_mutex, and allows read
  24789. - * access under sched-RCU read lock. As such, this function should be
  24790. - * called under wq_pool_mutex or with preemption disabled.
  24791. + * access under RCU read lock. As such, this function should be
  24792. + * called under wq_pool_mutex or inside of a rcu_read_lock() region.
  24793. *
  24794. * All fields of the returned pool are accessible as long as the above
  24795. * mentioned locking is in effect. If the returned pool needs to be used
  24796. @@ -844,51 +877,44 @@
  24797. */
  24798. static void wake_up_worker(struct worker_pool *pool)
  24799. {
  24800. - struct worker *worker = first_idle_worker(pool);
  24801. + struct worker *worker;
  24802. +
  24803. + rt_lock_idle_list(pool);
  24804. +
  24805. + worker = first_idle_worker(pool);
  24806. if (likely(worker))
  24807. wake_up_process(worker->task);
  24808. +
  24809. + rt_unlock_idle_list(pool);
  24810. }
  24811. /**
  24812. - * wq_worker_waking_up - a worker is waking up
  24813. - * @task: task waking up
  24814. - * @cpu: CPU @task is waking up to
  24815. - *
  24816. - * This function is called during try_to_wake_up() when a worker is
  24817. - * being awoken.
  24818. + * wq_worker_running - a worker is running again
  24819. + * @task: task returning from sleep
  24820. *
  24821. - * CONTEXT:
  24822. - * spin_lock_irq(rq->lock)
  24823. + * This function is called when a worker returns from schedule()
  24824. */
  24825. -void wq_worker_waking_up(struct task_struct *task, int cpu)
  24826. +void wq_worker_running(struct task_struct *task)
  24827. {
  24828. struct worker *worker = kthread_data(task);
  24829. - if (!(worker->flags & WORKER_NOT_RUNNING)) {
  24830. - WARN_ON_ONCE(worker->pool->cpu != cpu);
  24831. + if (!worker->sleeping)
  24832. + return;
  24833. + if (!(worker->flags & WORKER_NOT_RUNNING))
  24834. atomic_inc(&worker->pool->nr_running);
  24835. - }
  24836. + worker->sleeping = 0;
  24837. }
  24838. /**
  24839. * wq_worker_sleeping - a worker is going to sleep
  24840. * @task: task going to sleep
  24841. - * @cpu: CPU in question, must be the current CPU number
  24842. - *
  24843. - * This function is called during schedule() when a busy worker is
  24844. - * going to sleep. Worker on the same cpu can be woken up by
  24845. - * returning pointer to its task.
  24846. - *
  24847. - * CONTEXT:
  24848. - * spin_lock_irq(rq->lock)
  24849. - *
  24850. - * Return:
  24851. - * Worker task on @cpu to wake up, %NULL if none.
  24852. + * This function is called from schedule() when a busy worker is
  24853. + * going to sleep.
  24854. */
  24855. -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
  24856. +void wq_worker_sleeping(struct task_struct *task)
  24857. {
  24858. - struct worker *worker = kthread_data(task), *to_wakeup = NULL;
  24859. + struct worker *worker = kthread_data(task);
  24860. struct worker_pool *pool;
  24861. /*
  24862. @@ -897,29 +923,26 @@
  24863. * checking NOT_RUNNING.
  24864. */
  24865. if (worker->flags & WORKER_NOT_RUNNING)
  24866. - return NULL;
  24867. + return;
  24868. pool = worker->pool;
  24869. - /* this can only happen on the local cpu */
  24870. - if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu))
  24871. - return NULL;
  24872. + if (WARN_ON_ONCE(worker->sleeping))
  24873. + return;
  24874. +
  24875. + worker->sleeping = 1;
  24876. /*
  24877. * The counterpart of the following dec_and_test, implied mb,
  24878. * worklist not empty test sequence is in insert_work().
  24879. * Please read comment there.
  24880. - *
  24881. - * NOT_RUNNING is clear. This means that we're bound to and
  24882. - * running on the local cpu w/ rq lock held and preemption
  24883. - * disabled, which in turn means that none else could be
  24884. - * manipulating idle_list, so dereferencing idle_list without pool
  24885. - * lock is safe.
  24886. */
  24887. if (atomic_dec_and_test(&pool->nr_running) &&
  24888. - !list_empty(&pool->worklist))
  24889. - to_wakeup = first_idle_worker(pool);
  24890. - return to_wakeup ? to_wakeup->task : NULL;
  24891. + !list_empty(&pool->worklist)) {
  24892. + sched_lock_idle_list(pool);
  24893. + wake_up_worker(pool);
  24894. + sched_unlock_idle_list(pool);
  24895. + }
  24896. }
  24897. /**
  24898. @@ -1113,12 +1136,14 @@
  24899. {
  24900. if (pwq) {
  24901. /*
  24902. - * As both pwqs and pools are sched-RCU protected, the
  24903. + * As both pwqs and pools are RCU protected, the
  24904. * following lock operations are safe.
  24905. */
  24906. - spin_lock_irq(&pwq->pool->lock);
  24907. + rcu_read_lock();
  24908. + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock);
  24909. put_pwq(pwq);
  24910. - spin_unlock_irq(&pwq->pool->lock);
  24911. + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock);
  24912. + rcu_read_unlock();
  24913. }
  24914. }
  24915. @@ -1220,7 +1245,7 @@
  24916. struct worker_pool *pool;
  24917. struct pool_workqueue *pwq;
  24918. - local_irq_save(*flags);
  24919. + local_lock_irqsave(pendingb_lock, *flags);
  24920. /* try to steal the timer if it exists */
  24921. if (is_dwork) {
  24922. @@ -1239,6 +1264,7 @@
  24923. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
  24924. return 0;
  24925. + rcu_read_lock();
  24926. /*
  24927. * The queueing is in progress, or it is already queued. Try to
  24928. * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
  24929. @@ -1277,14 +1303,16 @@
  24930. set_work_pool_and_keep_pending(work, pool->id);
  24931. spin_unlock(&pool->lock);
  24932. + rcu_read_unlock();
  24933. return 1;
  24934. }
  24935. spin_unlock(&pool->lock);
  24936. fail:
  24937. - local_irq_restore(*flags);
  24938. + rcu_read_unlock();
  24939. + local_unlock_irqrestore(pendingb_lock, *flags);
  24940. if (work_is_canceling(work))
  24941. return -ENOENT;
  24942. - cpu_relax();
  24943. + cpu_chill();
  24944. return -EAGAIN;
  24945. }
  24946. @@ -1353,7 +1381,7 @@
  24947. * queued or lose PENDING. Grabbing PENDING and queueing should
  24948. * happen with IRQ disabled.
  24949. */
  24950. - WARN_ON_ONCE(!irqs_disabled());
  24951. + WARN_ON_ONCE_NONRT(!irqs_disabled());
  24952. debug_work_activate(work);
  24953. @@ -1361,6 +1389,8 @@
  24954. if (unlikely(wq->flags & __WQ_DRAINING) &&
  24955. WARN_ON_ONCE(!is_chained_work(wq)))
  24956. return;
  24957. +
  24958. + rcu_read_lock();
  24959. retry:
  24960. if (req_cpu == WORK_CPU_UNBOUND)
  24961. cpu = raw_smp_processor_id();
  24962. @@ -1417,10 +1447,8 @@
  24963. /* pwq determined, queue */
  24964. trace_workqueue_queue_work(req_cpu, pwq, work);
  24965. - if (WARN_ON(!list_empty(&work->entry))) {
  24966. - spin_unlock(&pwq->pool->lock);
  24967. - return;
  24968. - }
  24969. + if (WARN_ON(!list_empty(&work->entry)))
  24970. + goto out;
  24971. pwq->nr_in_flight[pwq->work_color]++;
  24972. work_flags = work_color_to_flags(pwq->work_color);
  24973. @@ -1436,7 +1464,9 @@
  24974. insert_work(pwq, work, worklist, work_flags);
  24975. +out:
  24976. spin_unlock(&pwq->pool->lock);
  24977. + rcu_read_unlock();
  24978. }
  24979. /**
  24980. @@ -1456,14 +1486,14 @@
  24981. bool ret = false;
  24982. unsigned long flags;
  24983. - local_irq_save(flags);
  24984. + local_lock_irqsave(pendingb_lock,flags);
  24985. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  24986. __queue_work(cpu, wq, work);
  24987. ret = true;
  24988. }
  24989. - local_irq_restore(flags);
  24990. + local_unlock_irqrestore(pendingb_lock, flags);
  24991. return ret;
  24992. }
  24993. EXPORT_SYMBOL(queue_work_on);
  24994. @@ -1530,14 +1560,14 @@
  24995. unsigned long flags;
  24996. /* read the comment in __queue_work() */
  24997. - local_irq_save(flags);
  24998. + local_lock_irqsave(pendingb_lock, flags);
  24999. if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
  25000. __queue_delayed_work(cpu, wq, dwork, delay);
  25001. ret = true;
  25002. }
  25003. - local_irq_restore(flags);
  25004. + local_unlock_irqrestore(pendingb_lock, flags);
  25005. return ret;
  25006. }
  25007. EXPORT_SYMBOL(queue_delayed_work_on);
  25008. @@ -1572,7 +1602,7 @@
  25009. if (likely(ret >= 0)) {
  25010. __queue_delayed_work(cpu, wq, dwork, delay);
  25011. - local_irq_restore(flags);
  25012. + local_unlock_irqrestore(pendingb_lock, flags);
  25013. }
  25014. /* -ENOENT from try_to_grab_pending() becomes %true */
  25015. @@ -1605,7 +1635,9 @@
  25016. worker->last_active = jiffies;
  25017. /* idle_list is LIFO */
  25018. + rt_lock_idle_list(pool);
  25019. list_add(&worker->entry, &pool->idle_list);
  25020. + rt_unlock_idle_list(pool);
  25021. if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
  25022. mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
  25023. @@ -1638,7 +1670,9 @@
  25024. return;
  25025. worker_clr_flags(worker, WORKER_IDLE);
  25026. pool->nr_idle--;
  25027. + rt_lock_idle_list(pool);
  25028. list_del_init(&worker->entry);
  25029. + rt_unlock_idle_list(pool);
  25030. }
  25031. static struct worker *alloc_worker(int node)
  25032. @@ -1806,7 +1840,9 @@
  25033. pool->nr_workers--;
  25034. pool->nr_idle--;
  25035. + rt_lock_idle_list(pool);
  25036. list_del_init(&worker->entry);
  25037. + rt_unlock_idle_list(pool);
  25038. worker->flags |= WORKER_DIE;
  25039. wake_up_process(worker->task);
  25040. }
  25041. @@ -2723,14 +2759,14 @@
  25042. might_sleep();
  25043. - local_irq_disable();
  25044. + rcu_read_lock();
  25045. pool = get_work_pool(work);
  25046. if (!pool) {
  25047. - local_irq_enable();
  25048. + rcu_read_unlock();
  25049. return false;
  25050. }
  25051. - spin_lock(&pool->lock);
  25052. + spin_lock_irq(&pool->lock);
  25053. /* see the comment in try_to_grab_pending() with the same code */
  25054. pwq = get_work_pwq(work);
  25055. if (pwq) {
  25056. @@ -2757,10 +2793,11 @@
  25057. else
  25058. lock_map_acquire_read(&pwq->wq->lockdep_map);
  25059. lock_map_release(&pwq->wq->lockdep_map);
  25060. -
  25061. + rcu_read_unlock();
  25062. return true;
  25063. already_gone:
  25064. spin_unlock_irq(&pool->lock);
  25065. + rcu_read_unlock();
  25066. return false;
  25067. }
  25068. @@ -2847,7 +2884,7 @@
  25069. /* tell other tasks trying to grab @work to back off */
  25070. mark_work_canceling(work);
  25071. - local_irq_restore(flags);
  25072. + local_unlock_irqrestore(pendingb_lock, flags);
  25073. flush_work(work);
  25074. clear_work_data(work);
  25075. @@ -2902,10 +2939,10 @@
  25076. */
  25077. bool flush_delayed_work(struct delayed_work *dwork)
  25078. {
  25079. - local_irq_disable();
  25080. + local_lock_irq(pendingb_lock);
  25081. if (del_timer_sync(&dwork->timer))
  25082. __queue_work(dwork->cpu, dwork->wq, &dwork->work);
  25083. - local_irq_enable();
  25084. + local_unlock_irq(pendingb_lock);
  25085. return flush_work(&dwork->work);
  25086. }
  25087. EXPORT_SYMBOL(flush_delayed_work);
  25088. @@ -2940,7 +2977,7 @@
  25089. set_work_pool_and_clear_pending(&dwork->work,
  25090. get_work_pool_id(&dwork->work));
  25091. - local_irq_restore(flags);
  25092. + local_unlock_irqrestore(pendingb_lock, flags);
  25093. return ret;
  25094. }
  25095. EXPORT_SYMBOL(cancel_delayed_work);
  25096. @@ -3198,7 +3235,7 @@
  25097. * put_unbound_pool - put a worker_pool
  25098. * @pool: worker_pool to put
  25099. *
  25100. - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
  25101. + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
  25102. * safe manner. get_unbound_pool() calls this function on its failure path
  25103. * and this function should be able to release pools which went through,
  25104. * successfully or not, init_worker_pool().
  25105. @@ -3252,8 +3289,8 @@
  25106. del_timer_sync(&pool->idle_timer);
  25107. del_timer_sync(&pool->mayday_timer);
  25108. - /* sched-RCU protected to allow dereferences from get_work_pool() */
  25109. - call_rcu_sched(&pool->rcu, rcu_free_pool);
  25110. + /* RCU protected to allow dereferences from get_work_pool() */
  25111. + call_rcu(&pool->rcu, rcu_free_pool);
  25112. }
  25113. /**
  25114. @@ -3358,14 +3395,14 @@
  25115. put_unbound_pool(pool);
  25116. mutex_unlock(&wq_pool_mutex);
  25117. - call_rcu_sched(&pwq->rcu, rcu_free_pwq);
  25118. + call_rcu(&pwq->rcu, rcu_free_pwq);
  25119. /*
  25120. * If we're the last pwq going away, @wq is already dead and no one
  25121. * is gonna access it anymore. Schedule RCU free.
  25122. */
  25123. if (is_last)
  25124. - call_rcu_sched(&wq->rcu, rcu_free_wq);
  25125. + call_rcu(&wq->rcu, rcu_free_wq);
  25126. }
  25127. /**
  25128. @@ -4003,7 +4040,7 @@
  25129. * The base ref is never dropped on per-cpu pwqs. Directly
  25130. * schedule RCU free.
  25131. */
  25132. - call_rcu_sched(&wq->rcu, rcu_free_wq);
  25133. + call_rcu(&wq->rcu, rcu_free_wq);
  25134. } else {
  25135. /*
  25136. * We're the sole accessor of @wq at this point. Directly
  25137. @@ -4096,7 +4133,8 @@
  25138. struct pool_workqueue *pwq;
  25139. bool ret;
  25140. - rcu_read_lock_sched();
  25141. + rcu_read_lock();
  25142. + preempt_disable();
  25143. if (cpu == WORK_CPU_UNBOUND)
  25144. cpu = smp_processor_id();
  25145. @@ -4107,7 +4145,8 @@
  25146. pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
  25147. ret = !list_empty(&pwq->delayed_works);
  25148. - rcu_read_unlock_sched();
  25149. + preempt_enable();
  25150. + rcu_read_unlock();
  25151. return ret;
  25152. }
  25153. @@ -4133,15 +4172,15 @@
  25154. if (work_pending(work))
  25155. ret |= WORK_BUSY_PENDING;
  25156. - local_irq_save(flags);
  25157. + rcu_read_lock();
  25158. pool = get_work_pool(work);
  25159. if (pool) {
  25160. - spin_lock(&pool->lock);
  25161. + spin_lock_irqsave(&pool->lock, flags);
  25162. if (find_worker_executing_work(pool, work))
  25163. ret |= WORK_BUSY_RUNNING;
  25164. - spin_unlock(&pool->lock);
  25165. + spin_unlock_irqrestore(&pool->lock, flags);
  25166. }
  25167. - local_irq_restore(flags);
  25168. + rcu_read_unlock();
  25169. return ret;
  25170. }
  25171. @@ -4330,7 +4369,7 @@
  25172. unsigned long flags;
  25173. int pi;
  25174. - rcu_read_lock_sched();
  25175. + rcu_read_lock();
  25176. pr_info("Showing busy workqueues and worker pools:\n");
  25177. @@ -4381,7 +4420,7 @@
  25178. spin_unlock_irqrestore(&pool->lock, flags);
  25179. }
  25180. - rcu_read_unlock_sched();
  25181. + rcu_read_unlock();
  25182. }
  25183. /*
  25184. @@ -4742,16 +4781,16 @@
  25185. * nr_active is monotonically decreasing. It's safe
  25186. * to peek without lock.
  25187. */
  25188. - rcu_read_lock_sched();
  25189. + rcu_read_lock();
  25190. for_each_pwq(pwq, wq) {
  25191. WARN_ON_ONCE(pwq->nr_active < 0);
  25192. if (pwq->nr_active) {
  25193. busy = true;
  25194. - rcu_read_unlock_sched();
  25195. + rcu_read_unlock();
  25196. goto out_unlock;
  25197. }
  25198. }
  25199. - rcu_read_unlock_sched();
  25200. + rcu_read_unlock();
  25201. }
  25202. out_unlock:
  25203. mutex_unlock(&wq_pool_mutex);
  25204. @@ -4865,7 +4904,8 @@
  25205. const char *delim = "";
  25206. int node, written = 0;
  25207. - rcu_read_lock_sched();
  25208. + get_online_cpus();
  25209. + rcu_read_lock();
  25210. for_each_node(node) {
  25211. written += scnprintf(buf + written, PAGE_SIZE - written,
  25212. "%s%d:%d", delim, node,
  25213. @@ -4873,7 +4913,8 @@
  25214. delim = " ";
  25215. }
  25216. written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
  25217. - rcu_read_unlock_sched();
  25218. + rcu_read_unlock();
  25219. + put_online_cpus();
  25220. return written;
  25221. }
  25222. diff -Nur linux-4.1.39.orig/kernel/workqueue_internal.h linux-4.1.39/kernel/workqueue_internal.h
  25223. --- linux-4.1.39.orig/kernel/workqueue_internal.h 2017-03-13 21:04:36.000000000 +0100
  25224. +++ linux-4.1.39/kernel/workqueue_internal.h 2017-04-18 17:56:30.645398372 +0200
  25225. @@ -43,6 +43,7 @@
  25226. unsigned long last_active; /* L: last active timestamp */
  25227. unsigned int flags; /* X: flags */
  25228. int id; /* I: worker id */
  25229. + int sleeping; /* None */
  25230. /*
  25231. * Opaque string set with work_set_desc(). Printed out with task
  25232. @@ -68,7 +69,7 @@
  25233. * Scheduler hooks for concurrency managed workqueue. Only to be used from
  25234. * sched/core.c and workqueue.c.
  25235. */
  25236. -void wq_worker_waking_up(struct task_struct *task, int cpu);
  25237. -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu);
  25238. +void wq_worker_running(struct task_struct *task);
  25239. +void wq_worker_sleeping(struct task_struct *task);
  25240. #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
  25241. diff -Nur linux-4.1.39.orig/lib/debugobjects.c linux-4.1.39/lib/debugobjects.c
  25242. --- linux-4.1.39.orig/lib/debugobjects.c 2017-03-13 21:04:36.000000000 +0100
  25243. +++ linux-4.1.39/lib/debugobjects.c 2017-04-18 17:56:30.645398372 +0200
  25244. @@ -309,7 +309,10 @@
  25245. struct debug_obj *obj;
  25246. unsigned long flags;
  25247. - fill_pool();
  25248. +#ifdef CONFIG_PREEMPT_RT_FULL
  25249. + if (preempt_count() == 0 && !irqs_disabled())
  25250. +#endif
  25251. + fill_pool();
  25252. db = get_bucket((unsigned long) addr);
  25253. diff -Nur linux-4.1.39.orig/lib/dump_stack.c linux-4.1.39/lib/dump_stack.c
  25254. --- linux-4.1.39.orig/lib/dump_stack.c 2017-03-13 21:04:36.000000000 +0100
  25255. +++ linux-4.1.39/lib/dump_stack.c 2017-04-18 17:56:30.645398372 +0200
  25256. @@ -8,6 +8,7 @@
  25257. #include <linux/sched.h>
  25258. #include <linux/smp.h>
  25259. #include <linux/atomic.h>
  25260. +#include <linux/locallock.h>
  25261. static void __dump_stack(void)
  25262. {
  25263. diff -Nur linux-4.1.39.orig/lib/idr.c linux-4.1.39/lib/idr.c
  25264. --- linux-4.1.39.orig/lib/idr.c 2017-03-13 21:04:36.000000000 +0100
  25265. +++ linux-4.1.39/lib/idr.c 2017-04-18 17:56:30.645398372 +0200
  25266. @@ -30,6 +30,7 @@
  25267. #include <linux/idr.h>
  25268. #include <linux/spinlock.h>
  25269. #include <linux/percpu.h>
  25270. +#include <linux/locallock.h>
  25271. #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1)
  25272. #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
  25273. @@ -366,6 +367,35 @@
  25274. idr_mark_full(pa, id);
  25275. }
  25276. +#ifdef CONFIG_PREEMPT_RT_FULL
  25277. +static DEFINE_LOCAL_IRQ_LOCK(idr_lock);
  25278. +
  25279. +static inline void idr_preload_lock(void)
  25280. +{
  25281. + local_lock(idr_lock);
  25282. +}
  25283. +
  25284. +static inline void idr_preload_unlock(void)
  25285. +{
  25286. + local_unlock(idr_lock);
  25287. +}
  25288. +
  25289. +void idr_preload_end(void)
  25290. +{
  25291. + idr_preload_unlock();
  25292. +}
  25293. +EXPORT_SYMBOL(idr_preload_end);
  25294. +#else
  25295. +static inline void idr_preload_lock(void)
  25296. +{
  25297. + preempt_disable();
  25298. +}
  25299. +
  25300. +static inline void idr_preload_unlock(void)
  25301. +{
  25302. + preempt_enable();
  25303. +}
  25304. +#endif
  25305. /**
  25306. * idr_preload - preload for idr_alloc()
  25307. @@ -401,7 +431,7 @@
  25308. WARN_ON_ONCE(in_interrupt());
  25309. might_sleep_if(gfp_mask & __GFP_WAIT);
  25310. - preempt_disable();
  25311. + idr_preload_lock();
  25312. /*
  25313. * idr_alloc() is likely to succeed w/o full idr_layer buffer and
  25314. @@ -413,9 +443,9 @@
  25315. while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
  25316. struct idr_layer *new;
  25317. - preempt_enable();
  25318. + idr_preload_unlock();
  25319. new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
  25320. - preempt_disable();
  25321. + idr_preload_lock();
  25322. if (!new)
  25323. break;
  25324. diff -Nur linux-4.1.39.orig/lib/Kconfig linux-4.1.39/lib/Kconfig
  25325. --- linux-4.1.39.orig/lib/Kconfig 2017-03-13 21:04:36.000000000 +0100
  25326. +++ linux-4.1.39/lib/Kconfig 2017-04-18 17:56:30.645398372 +0200
  25327. @@ -391,6 +391,7 @@
  25328. config CPUMASK_OFFSTACK
  25329. bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
  25330. + depends on !PREEMPT_RT_FULL
  25331. help
  25332. Use dynamic allocation for cpumask_var_t, instead of putting
  25333. them on the stack. This is a bit more expensive, but avoids
  25334. diff -Nur linux-4.1.39.orig/lib/locking-selftest.c linux-4.1.39/lib/locking-selftest.c
  25335. --- linux-4.1.39.orig/lib/locking-selftest.c 2017-03-13 21:04:36.000000000 +0100
  25336. +++ linux-4.1.39/lib/locking-selftest.c 2017-04-18 17:56:30.645398372 +0200
  25337. @@ -590,6 +590,8 @@
  25338. #include "locking-selftest-spin-hardirq.h"
  25339. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
  25340. +#ifndef CONFIG_PREEMPT_RT_FULL
  25341. +
  25342. #include "locking-selftest-rlock-hardirq.h"
  25343. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
  25344. @@ -605,9 +607,12 @@
  25345. #include "locking-selftest-wlock-softirq.h"
  25346. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
  25347. +#endif
  25348. +
  25349. #undef E1
  25350. #undef E2
  25351. +#ifndef CONFIG_PREEMPT_RT_FULL
  25352. /*
  25353. * Enabling hardirqs with a softirq-safe lock held:
  25354. */
  25355. @@ -640,6 +645,8 @@
  25356. #undef E1
  25357. #undef E2
  25358. +#endif
  25359. +
  25360. /*
  25361. * Enabling irqs with an irq-safe lock held:
  25362. */
  25363. @@ -663,6 +670,8 @@
  25364. #include "locking-selftest-spin-hardirq.h"
  25365. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
  25366. +#ifndef CONFIG_PREEMPT_RT_FULL
  25367. +
  25368. #include "locking-selftest-rlock-hardirq.h"
  25369. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
  25370. @@ -678,6 +687,8 @@
  25371. #include "locking-selftest-wlock-softirq.h"
  25372. GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
  25373. +#endif
  25374. +
  25375. #undef E1
  25376. #undef E2
  25377. @@ -709,6 +720,8 @@
  25378. #include "locking-selftest-spin-hardirq.h"
  25379. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
  25380. +#ifndef CONFIG_PREEMPT_RT_FULL
  25381. +
  25382. #include "locking-selftest-rlock-hardirq.h"
  25383. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
  25384. @@ -724,6 +737,8 @@
  25385. #include "locking-selftest-wlock-softirq.h"
  25386. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
  25387. +#endif
  25388. +
  25389. #undef E1
  25390. #undef E2
  25391. #undef E3
  25392. @@ -757,6 +772,8 @@
  25393. #include "locking-selftest-spin-hardirq.h"
  25394. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
  25395. +#ifndef CONFIG_PREEMPT_RT_FULL
  25396. +
  25397. #include "locking-selftest-rlock-hardirq.h"
  25398. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
  25399. @@ -772,10 +789,14 @@
  25400. #include "locking-selftest-wlock-softirq.h"
  25401. GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
  25402. +#endif
  25403. +
  25404. #undef E1
  25405. #undef E2
  25406. #undef E3
  25407. +#ifndef CONFIG_PREEMPT_RT_FULL
  25408. +
  25409. /*
  25410. * read-lock / write-lock irq inversion.
  25411. *
  25412. @@ -838,6 +859,10 @@
  25413. #undef E2
  25414. #undef E3
  25415. +#endif
  25416. +
  25417. +#ifndef CONFIG_PREEMPT_RT_FULL
  25418. +
  25419. /*
  25420. * read-lock / write-lock recursion that is actually safe.
  25421. */
  25422. @@ -876,6 +901,8 @@
  25423. #undef E2
  25424. #undef E3
  25425. +#endif
  25426. +
  25427. /*
  25428. * read-lock / write-lock recursion that is unsafe.
  25429. */
  25430. @@ -1858,6 +1885,7 @@
  25431. printk(" --------------------------------------------------------------------------\n");
  25432. +#ifndef CONFIG_PREEMPT_RT_FULL
  25433. /*
  25434. * irq-context testcases:
  25435. */
  25436. @@ -1870,6 +1898,28 @@
  25437. DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
  25438. // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
  25439. +#else
  25440. + /* On -rt, we only do hardirq context test for raw spinlock */
  25441. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
  25442. + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21);
  25443. +
  25444. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12);
  25445. + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21);
  25446. +
  25447. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123);
  25448. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132);
  25449. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213);
  25450. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231);
  25451. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312);
  25452. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321);
  25453. +
  25454. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123);
  25455. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132);
  25456. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213);
  25457. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231);
  25458. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
  25459. + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
  25460. +#endif
  25461. ww_tests();
  25462. diff -Nur linux-4.1.39.orig/lib/percpu_ida.c linux-4.1.39/lib/percpu_ida.c
  25463. --- linux-4.1.39.orig/lib/percpu_ida.c 2017-03-13 21:04:36.000000000 +0100
  25464. +++ linux-4.1.39/lib/percpu_ida.c 2017-04-18 17:56:30.645398372 +0200
  25465. @@ -26,6 +26,9 @@
  25466. #include <linux/string.h>
  25467. #include <linux/spinlock.h>
  25468. #include <linux/percpu_ida.h>
  25469. +#include <linux/locallock.h>
  25470. +
  25471. +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock);
  25472. struct percpu_ida_cpu {
  25473. /*
  25474. @@ -148,13 +151,13 @@
  25475. unsigned long flags;
  25476. int tag;
  25477. - local_irq_save(flags);
  25478. + local_lock_irqsave(irq_off_lock, flags);
  25479. tags = this_cpu_ptr(pool->tag_cpu);
  25480. /* Fastpath */
  25481. tag = alloc_local_tag(tags);
  25482. if (likely(tag >= 0)) {
  25483. - local_irq_restore(flags);
  25484. + local_unlock_irqrestore(irq_off_lock, flags);
  25485. return tag;
  25486. }
  25487. @@ -173,6 +176,7 @@
  25488. if (!tags->nr_free)
  25489. alloc_global_tags(pool, tags);
  25490. +
  25491. if (!tags->nr_free)
  25492. steal_tags(pool, tags);
  25493. @@ -184,7 +188,7 @@
  25494. }
  25495. spin_unlock(&pool->lock);
  25496. - local_irq_restore(flags);
  25497. + local_unlock_irqrestore(irq_off_lock, flags);
  25498. if (tag >= 0 || state == TASK_RUNNING)
  25499. break;
  25500. @@ -196,7 +200,7 @@
  25501. schedule();
  25502. - local_irq_save(flags);
  25503. + local_lock_irqsave(irq_off_lock, flags);
  25504. tags = this_cpu_ptr(pool->tag_cpu);
  25505. }
  25506. if (state != TASK_RUNNING)
  25507. @@ -221,7 +225,7 @@
  25508. BUG_ON(tag >= pool->nr_tags);
  25509. - local_irq_save(flags);
  25510. + local_lock_irqsave(irq_off_lock, flags);
  25511. tags = this_cpu_ptr(pool->tag_cpu);
  25512. spin_lock(&tags->lock);
  25513. @@ -253,7 +257,7 @@
  25514. spin_unlock(&pool->lock);
  25515. }
  25516. - local_irq_restore(flags);
  25517. + local_unlock_irqrestore(irq_off_lock, flags);
  25518. }
  25519. EXPORT_SYMBOL_GPL(percpu_ida_free);
  25520. @@ -345,7 +349,7 @@
  25521. struct percpu_ida_cpu *remote;
  25522. unsigned cpu, i, err = 0;
  25523. - local_irq_save(flags);
  25524. + local_lock_irqsave(irq_off_lock, flags);
  25525. for_each_possible_cpu(cpu) {
  25526. remote = per_cpu_ptr(pool->tag_cpu, cpu);
  25527. spin_lock(&remote->lock);
  25528. @@ -367,7 +371,7 @@
  25529. }
  25530. spin_unlock(&pool->lock);
  25531. out:
  25532. - local_irq_restore(flags);
  25533. + local_unlock_irqrestore(irq_off_lock, flags);
  25534. return err;
  25535. }
  25536. EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
  25537. diff -Nur linux-4.1.39.orig/lib/radix-tree.c linux-4.1.39/lib/radix-tree.c
  25538. --- linux-4.1.39.orig/lib/radix-tree.c 2017-03-13 21:04:36.000000000 +0100
  25539. +++ linux-4.1.39/lib/radix-tree.c 2017-04-18 17:56:30.645398372 +0200
  25540. @@ -34,6 +34,7 @@
  25541. #include <linux/bitops.h>
  25542. #include <linux/rcupdate.h>
  25543. #include <linux/preempt_mask.h> /* in_interrupt() */
  25544. +#include <linux/locallock.h>
  25545. /*
  25546. @@ -68,6 +69,7 @@
  25547. struct radix_tree_node *nodes[RADIX_TREE_PRELOAD_SIZE];
  25548. };
  25549. static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
  25550. +static DEFINE_LOCAL_IRQ_LOCK(radix_tree_preloads_lock);
  25551. static inline void *ptr_to_indirect(void *ptr)
  25552. {
  25553. @@ -195,12 +197,13 @@
  25554. * succeed in getting a node here (and never reach
  25555. * kmem_cache_alloc)
  25556. */
  25557. - rtp = this_cpu_ptr(&radix_tree_preloads);
  25558. + rtp = &get_locked_var(radix_tree_preloads_lock, radix_tree_preloads);
  25559. if (rtp->nr) {
  25560. ret = rtp->nodes[rtp->nr - 1];
  25561. rtp->nodes[rtp->nr - 1] = NULL;
  25562. rtp->nr--;
  25563. }
  25564. + put_locked_var(radix_tree_preloads_lock, radix_tree_preloads);
  25565. /*
  25566. * Update the allocation stack trace as this is more useful
  25567. * for debugging.
  25568. @@ -255,14 +258,14 @@
  25569. struct radix_tree_node *node;
  25570. int ret = -ENOMEM;
  25571. - preempt_disable();
  25572. + local_lock(radix_tree_preloads_lock);
  25573. rtp = this_cpu_ptr(&radix_tree_preloads);
  25574. while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
  25575. - preempt_enable();
  25576. + local_unlock(radix_tree_preloads_lock);
  25577. node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  25578. if (node == NULL)
  25579. goto out;
  25580. - preempt_disable();
  25581. + local_lock(radix_tree_preloads_lock);
  25582. rtp = this_cpu_ptr(&radix_tree_preloads);
  25583. if (rtp->nr < ARRAY_SIZE(rtp->nodes))
  25584. rtp->nodes[rtp->nr++] = node;
  25585. @@ -301,11 +304,17 @@
  25586. if (gfp_mask & __GFP_WAIT)
  25587. return __radix_tree_preload(gfp_mask);
  25588. /* Preloading doesn't help anything with this gfp mask, skip it */
  25589. - preempt_disable();
  25590. + local_lock(radix_tree_preloads_lock);
  25591. return 0;
  25592. }
  25593. EXPORT_SYMBOL(radix_tree_maybe_preload);
  25594. +void radix_tree_preload_end(void)
  25595. +{
  25596. + local_unlock(radix_tree_preloads_lock);
  25597. +}
  25598. +EXPORT_SYMBOL(radix_tree_preload_end);
  25599. +
  25600. /*
  25601. * Return the maximum key which can be store into a
  25602. * radix tree with height HEIGHT.
  25603. diff -Nur linux-4.1.39.orig/lib/scatterlist.c linux-4.1.39/lib/scatterlist.c
  25604. --- linux-4.1.39.orig/lib/scatterlist.c 2017-03-13 21:04:36.000000000 +0100
  25605. +++ linux-4.1.39/lib/scatterlist.c 2017-04-18 17:56:30.645398372 +0200
  25606. @@ -592,7 +592,7 @@
  25607. flush_kernel_dcache_page(miter->page);
  25608. if (miter->__flags & SG_MITER_ATOMIC) {
  25609. - WARN_ON_ONCE(preemptible());
  25610. + WARN_ON_ONCE(!pagefault_disabled());
  25611. kunmap_atomic(miter->addr);
  25612. } else
  25613. kunmap(miter->page);
  25614. @@ -637,7 +637,7 @@
  25615. if (!sg_miter_skip(&miter, skip))
  25616. return false;
  25617. - local_irq_save(flags);
  25618. + local_irq_save_nort(flags);
  25619. while (sg_miter_next(&miter) && offset < buflen) {
  25620. unsigned int len;
  25621. @@ -654,7 +654,7 @@
  25622. sg_miter_stop(&miter);
  25623. - local_irq_restore(flags);
  25624. + local_irq_restore_nort(flags);
  25625. return offset;
  25626. }
  25627. diff -Nur linux-4.1.39.orig/lib/smp_processor_id.c linux-4.1.39/lib/smp_processor_id.c
  25628. --- linux-4.1.39.orig/lib/smp_processor_id.c 2017-03-13 21:04:36.000000000 +0100
  25629. +++ linux-4.1.39/lib/smp_processor_id.c 2017-04-18 17:56:30.645398372 +0200
  25630. @@ -39,8 +39,9 @@
  25631. if (!printk_ratelimit())
  25632. goto out_enable;
  25633. - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
  25634. - what1, what2, preempt_count() - 1, current->comm, current->pid);
  25635. + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n",
  25636. + what1, what2, preempt_count() - 1, __migrate_disabled(current),
  25637. + current->comm, current->pid);
  25638. print_symbol("caller is %s\n", (long)__builtin_return_address(0));
  25639. dump_stack();
  25640. diff -Nur linux-4.1.39.orig/lib/strnlen_user.c linux-4.1.39/lib/strnlen_user.c
  25641. --- linux-4.1.39.orig/lib/strnlen_user.c 2017-03-13 21:04:36.000000000 +0100
  25642. +++ linux-4.1.39/lib/strnlen_user.c 2017-04-18 17:56:30.649398527 +0200
  25643. @@ -85,7 +85,8 @@
  25644. * @str: The string to measure.
  25645. * @count: Maximum count (including NUL character)
  25646. *
  25647. - * Context: User context only. This function may sleep.
  25648. + * Context: User context only. This function may sleep if pagefaults are
  25649. + * enabled.
  25650. *
  25651. * Get the size of a NUL-terminated string in user space.
  25652. *
  25653. @@ -121,7 +122,8 @@
  25654. * strlen_user: - Get the size of a user string INCLUDING final NUL.
  25655. * @str: The string to measure.
  25656. *
  25657. - * Context: User context only. This function may sleep.
  25658. + * Context: User context only. This function may sleep if pagefaults are
  25659. + * enabled.
  25660. *
  25661. * Get the size of a NUL-terminated string in user space.
  25662. *
  25663. diff -Nur linux-4.1.39.orig/Makefile linux-4.1.39/Makefile
  25664. --- linux-4.1.39.orig/Makefile 2017-03-13 21:04:36.000000000 +0100
  25665. +++ linux-4.1.39/Makefile 2017-04-18 17:56:30.545394493 +0200
  25666. @@ -400,7 +400,7 @@
  25667. KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
  25668. -fno-strict-aliasing -fno-common \
  25669. -Werror-implicit-function-declaration \
  25670. - -Wno-format-security \
  25671. + -Wno-format-security -fno-PIE \
  25672. -std=gnu89
  25673. KBUILD_AFLAGS_KERNEL :=
  25674. diff -Nur linux-4.1.39.orig/mm/compaction.c linux-4.1.39/mm/compaction.c
  25675. --- linux-4.1.39.orig/mm/compaction.c 2017-03-13 21:04:36.000000000 +0100
  25676. +++ linux-4.1.39/mm/compaction.c 2017-04-18 17:56:30.649398527 +0200
  25677. @@ -1423,10 +1423,12 @@
  25678. cc->migrate_pfn & ~((1UL << cc->order) - 1);
  25679. if (last_migrated_pfn < current_block_start) {
  25680. - cpu = get_cpu();
  25681. + cpu = get_cpu_light();
  25682. + local_lock_irq(swapvec_lock);
  25683. lru_add_drain_cpu(cpu);
  25684. + local_unlock_irq(swapvec_lock);
  25685. drain_local_pages(zone);
  25686. - put_cpu();
  25687. + put_cpu_light();
  25688. /* No more flushing until we migrate again */
  25689. last_migrated_pfn = 0;
  25690. }
  25691. diff -Nur linux-4.1.39.orig/mm/filemap.c linux-4.1.39/mm/filemap.c
  25692. --- linux-4.1.39.orig/mm/filemap.c 2017-03-13 21:04:36.000000000 +0100
  25693. +++ linux-4.1.39/mm/filemap.c 2017-04-18 17:56:30.649398527 +0200
  25694. @@ -167,7 +167,9 @@
  25695. if (!workingset_node_pages(node) &&
  25696. list_empty(&node->private_list)) {
  25697. node->private_data = mapping;
  25698. - list_lru_add(&workingset_shadow_nodes, &node->private_list);
  25699. + local_lock(workingset_shadow_lock);
  25700. + list_lru_add(&__workingset_shadow_nodes, &node->private_list);
  25701. + local_unlock(workingset_shadow_lock);
  25702. }
  25703. }
  25704. @@ -533,9 +535,12 @@
  25705. * node->private_list is protected by
  25706. * mapping->tree_lock.
  25707. */
  25708. - if (!list_empty(&node->private_list))
  25709. - list_lru_del(&workingset_shadow_nodes,
  25710. + if (!list_empty(&node->private_list)) {
  25711. + local_lock(workingset_shadow_lock);
  25712. + list_lru_del(&__workingset_shadow_nodes,
  25713. &node->private_list);
  25714. + local_unlock(workingset_shadow_lock);
  25715. + }
  25716. }
  25717. return 0;
  25718. }
  25719. diff -Nur linux-4.1.39.orig/mm/highmem.c linux-4.1.39/mm/highmem.c
  25720. --- linux-4.1.39.orig/mm/highmem.c 2017-03-13 21:04:36.000000000 +0100
  25721. +++ linux-4.1.39/mm/highmem.c 2017-04-18 17:56:30.649398527 +0200
  25722. @@ -29,10 +29,11 @@
  25723. #include <linux/kgdb.h>
  25724. #include <asm/tlbflush.h>
  25725. -
  25726. +#ifndef CONFIG_PREEMPT_RT_FULL
  25727. #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
  25728. DEFINE_PER_CPU(int, __kmap_atomic_idx);
  25729. #endif
  25730. +#endif
  25731. /*
  25732. * Virtual_count is not a pure "count".
  25733. @@ -107,8 +108,9 @@
  25734. unsigned long totalhigh_pages __read_mostly;
  25735. EXPORT_SYMBOL(totalhigh_pages);
  25736. -
  25737. +#ifndef CONFIG_PREEMPT_RT_FULL
  25738. EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
  25739. +#endif
  25740. unsigned int nr_free_highpages (void)
  25741. {
  25742. diff -Nur linux-4.1.39.orig/mm/Kconfig linux-4.1.39/mm/Kconfig
  25743. --- linux-4.1.39.orig/mm/Kconfig 2017-03-13 21:04:36.000000000 +0100
  25744. +++ linux-4.1.39/mm/Kconfig 2017-04-18 17:56:30.649398527 +0200
  25745. @@ -409,7 +409,7 @@
  25746. config TRANSPARENT_HUGEPAGE
  25747. bool "Transparent Hugepage Support"
  25748. - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
  25749. + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL
  25750. select COMPACTION
  25751. help
  25752. Transparent Hugepages allows the kernel to use huge pages and
  25753. diff -Nur linux-4.1.39.orig/mm/memcontrol.c linux-4.1.39/mm/memcontrol.c
  25754. --- linux-4.1.39.orig/mm/memcontrol.c 2017-03-13 21:04:36.000000000 +0100
  25755. +++ linux-4.1.39/mm/memcontrol.c 2017-04-18 17:56:30.649398527 +0200
  25756. @@ -66,6 +66,8 @@
  25757. #include <net/sock.h>
  25758. #include <net/ip.h>
  25759. #include <net/tcp_memcontrol.h>
  25760. +#include <linux/locallock.h>
  25761. +
  25762. #include "slab.h"
  25763. #include <asm/uaccess.h>
  25764. @@ -85,6 +87,7 @@
  25765. #define do_swap_account 0
  25766. #endif
  25767. +static DEFINE_LOCAL_IRQ_LOCK(event_lock);
  25768. static const char * const mem_cgroup_stat_names[] = {
  25769. "cache",
  25770. "rss",
  25771. @@ -2124,14 +2127,17 @@
  25772. */
  25773. static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  25774. {
  25775. - struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
  25776. + struct memcg_stock_pcp *stock;
  25777. + int cpu = get_cpu_light();
  25778. +
  25779. + stock = &per_cpu(memcg_stock, cpu);
  25780. if (stock->cached != memcg) { /* reset if necessary */
  25781. drain_stock(stock);
  25782. stock->cached = memcg;
  25783. }
  25784. stock->nr_pages += nr_pages;
  25785. - put_cpu_var(memcg_stock);
  25786. + put_cpu_light();
  25787. }
  25788. /*
  25789. @@ -2147,7 +2153,7 @@
  25790. return;
  25791. /* Notify other cpus that system-wide "drain" is running */
  25792. get_online_cpus();
  25793. - curcpu = get_cpu();
  25794. + curcpu = get_cpu_light();
  25795. for_each_online_cpu(cpu) {
  25796. struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
  25797. struct mem_cgroup *memcg;
  25798. @@ -2164,7 +2170,7 @@
  25799. schedule_work_on(cpu, &stock->work);
  25800. }
  25801. }
  25802. - put_cpu();
  25803. + put_cpu_light();
  25804. put_online_cpus();
  25805. mutex_unlock(&percpu_charge_mutex);
  25806. }
  25807. @@ -4803,12 +4809,12 @@
  25808. ret = 0;
  25809. - local_irq_disable();
  25810. + local_lock_irq(event_lock);
  25811. mem_cgroup_charge_statistics(to, page, nr_pages);
  25812. memcg_check_events(to, page);
  25813. mem_cgroup_charge_statistics(from, page, -nr_pages);
  25814. memcg_check_events(from, page);
  25815. - local_irq_enable();
  25816. + local_unlock_irq(event_lock);
  25817. out_unlock:
  25818. unlock_page(page);
  25819. out:
  25820. @@ -5551,10 +5557,10 @@
  25821. VM_BUG_ON_PAGE(!PageTransHuge(page), page);
  25822. }
  25823. - local_irq_disable();
  25824. + local_lock_irq(event_lock);
  25825. mem_cgroup_charge_statistics(memcg, page, nr_pages);
  25826. memcg_check_events(memcg, page);
  25827. - local_irq_enable();
  25828. + local_unlock_irq(event_lock);
  25829. if (do_swap_account && PageSwapCache(page)) {
  25830. swp_entry_t entry = { .val = page_private(page) };
  25831. @@ -5610,14 +5616,14 @@
  25832. memcg_oom_recover(memcg);
  25833. }
  25834. - local_irq_save(flags);
  25835. + local_lock_irqsave(event_lock, flags);
  25836. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
  25837. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
  25838. __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
  25839. __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
  25840. __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
  25841. memcg_check_events(memcg, dummy_page);
  25842. - local_irq_restore(flags);
  25843. + local_unlock_irqrestore(event_lock, flags);
  25844. if (!mem_cgroup_is_root(memcg))
  25845. css_put_many(&memcg->css, nr_pages);
  25846. @@ -5821,6 +5827,7 @@
  25847. {
  25848. struct mem_cgroup *memcg;
  25849. unsigned short oldid;
  25850. + unsigned long flags;
  25851. VM_BUG_ON_PAGE(PageLRU(page), page);
  25852. VM_BUG_ON_PAGE(page_count(page), page);
  25853. @@ -5843,9 +5850,11 @@
  25854. if (!mem_cgroup_is_root(memcg))
  25855. page_counter_uncharge(&memcg->memory, 1);
  25856. + local_lock_irqsave(event_lock, flags);
  25857. /* Caller disabled preemption with mapping->tree_lock */
  25858. mem_cgroup_charge_statistics(memcg, page, -1);
  25859. memcg_check_events(memcg, page);
  25860. + local_unlock_irqrestore(event_lock, flags);
  25861. }
  25862. /**
  25863. diff -Nur linux-4.1.39.orig/mm/memory.c linux-4.1.39/mm/memory.c
  25864. --- linux-4.1.39.orig/mm/memory.c 2017-03-13 21:04:36.000000000 +0100
  25865. +++ linux-4.1.39/mm/memory.c 2017-04-18 17:56:30.649398527 +0200
  25866. @@ -3753,7 +3753,7 @@
  25867. }
  25868. #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
  25869. -void might_fault(void)
  25870. +void __might_fault(const char *file, int line)
  25871. {
  25872. /*
  25873. * Some code (nfs/sunrpc) uses socket ops on kernel memory while
  25874. @@ -3763,21 +3763,15 @@
  25875. */
  25876. if (segment_eq(get_fs(), KERNEL_DS))
  25877. return;
  25878. -
  25879. - /*
  25880. - * it would be nicer only to annotate paths which are not under
  25881. - * pagefault_disable, however that requires a larger audit and
  25882. - * providing helpers like get_user_atomic.
  25883. - */
  25884. - if (in_atomic())
  25885. + if (pagefault_disabled())
  25886. return;
  25887. -
  25888. - __might_sleep(__FILE__, __LINE__, 0);
  25889. -
  25890. + __might_sleep(file, line, 0);
  25891. +#if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
  25892. if (current->mm)
  25893. might_lock_read(&current->mm->mmap_sem);
  25894. +#endif
  25895. }
  25896. -EXPORT_SYMBOL(might_fault);
  25897. +EXPORT_SYMBOL(__might_fault);
  25898. #endif
  25899. #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
  25900. diff -Nur linux-4.1.39.orig/mm/mmu_context.c linux-4.1.39/mm/mmu_context.c
  25901. --- linux-4.1.39.orig/mm/mmu_context.c 2017-03-13 21:04:36.000000000 +0100
  25902. +++ linux-4.1.39/mm/mmu_context.c 2017-04-18 17:56:30.649398527 +0200
  25903. @@ -23,6 +23,7 @@
  25904. struct task_struct *tsk = current;
  25905. task_lock(tsk);
  25906. + preempt_disable_rt();
  25907. active_mm = tsk->active_mm;
  25908. if (active_mm != mm) {
  25909. atomic_inc(&mm->mm_count);
  25910. @@ -30,6 +31,7 @@
  25911. }
  25912. tsk->mm = mm;
  25913. switch_mm(active_mm, mm, tsk);
  25914. + preempt_enable_rt();
  25915. task_unlock(tsk);
  25916. #ifdef finish_arch_post_lock_switch
  25917. finish_arch_post_lock_switch();
  25918. diff -Nur linux-4.1.39.orig/mm/page_alloc.c linux-4.1.39/mm/page_alloc.c
  25919. --- linux-4.1.39.orig/mm/page_alloc.c 2017-03-13 21:04:36.000000000 +0100
  25920. +++ linux-4.1.39/mm/page_alloc.c 2017-04-18 17:56:30.649398527 +0200
  25921. @@ -60,6 +60,7 @@
  25922. #include <linux/page_ext.h>
  25923. #include <linux/hugetlb.h>
  25924. #include <linux/sched/rt.h>
  25925. +#include <linux/locallock.h>
  25926. #include <linux/page_owner.h>
  25927. #include <asm/sections.h>
  25928. @@ -233,6 +234,18 @@
  25929. EXPORT_SYMBOL(nr_online_nodes);
  25930. #endif
  25931. +static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
  25932. +
  25933. +#ifdef CONFIG_PREEMPT_RT_BASE
  25934. +# define cpu_lock_irqsave(cpu, flags) \
  25935. + local_lock_irqsave_on(pa_lock, flags, cpu)
  25936. +# define cpu_unlock_irqrestore(cpu, flags) \
  25937. + local_unlock_irqrestore_on(pa_lock, flags, cpu)
  25938. +#else
  25939. +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
  25940. +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
  25941. +#endif
  25942. +
  25943. int page_group_by_mobility_disabled __read_mostly;
  25944. void set_pageblock_migratetype(struct page *page, int migratetype)
  25945. @@ -701,7 +714,7 @@
  25946. }
  25947. /*
  25948. - * Frees a number of pages from the PCP lists
  25949. + * Frees a number of pages which have been collected from the pcp lists.
  25950. * Assumes all pages on list are in same zone, and of same order.
  25951. * count is the number of pages to free.
  25952. *
  25953. @@ -712,18 +725,51 @@
  25954. * pinned" detection logic.
  25955. */
  25956. static void free_pcppages_bulk(struct zone *zone, int count,
  25957. - struct per_cpu_pages *pcp)
  25958. + struct list_head *list)
  25959. {
  25960. - int migratetype = 0;
  25961. - int batch_free = 0;
  25962. int to_free = count;
  25963. unsigned long nr_scanned;
  25964. + unsigned long flags;
  25965. +
  25966. + spin_lock_irqsave(&zone->lock, flags);
  25967. - spin_lock(&zone->lock);
  25968. nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
  25969. if (nr_scanned)
  25970. __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  25971. + while (!list_empty(list)) {
  25972. + struct page *page = list_first_entry(list, struct page, lru);
  25973. + int mt; /* migratetype of the to-be-freed page */
  25974. +
  25975. + /* must delete as __free_one_page list manipulates */
  25976. + list_del(&page->lru);
  25977. +
  25978. + mt = get_freepage_migratetype(page);
  25979. + if (unlikely(has_isolate_pageblock(zone)))
  25980. + mt = get_pageblock_migratetype(page);
  25981. +
  25982. + /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
  25983. + __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  25984. + trace_mm_page_pcpu_drain(page, 0, mt);
  25985. + to_free--;
  25986. + }
  25987. + WARN_ON(to_free != 0);
  25988. + spin_unlock_irqrestore(&zone->lock, flags);
  25989. +}
  25990. +
  25991. +/*
  25992. + * Moves a number of pages from the PCP lists to free list which
  25993. + * is freed outside of the locked region.
  25994. + *
  25995. + * Assumes all pages on list are in same zone, and of same order.
  25996. + * count is the number of pages to free.
  25997. + */
  25998. +static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
  25999. + struct list_head *dst)
  26000. +{
  26001. + int migratetype = 0;
  26002. + int batch_free = 0;
  26003. +
  26004. while (to_free) {
  26005. struct page *page;
  26006. struct list_head *list;
  26007. @@ -739,7 +785,7 @@
  26008. batch_free++;
  26009. if (++migratetype == MIGRATE_PCPTYPES)
  26010. migratetype = 0;
  26011. - list = &pcp->lists[migratetype];
  26012. + list = &src->lists[migratetype];
  26013. } while (list_empty(list));
  26014. /* This is the only non-empty list. Free them all. */
  26015. @@ -747,21 +793,11 @@
  26016. batch_free = to_free;
  26017. do {
  26018. - int mt; /* migratetype of the to-be-freed page */
  26019. -
  26020. - page = list_entry(list->prev, struct page, lru);
  26021. - /* must delete as __free_one_page list manipulates */
  26022. + page = list_last_entry(list, struct page, lru);
  26023. list_del(&page->lru);
  26024. - mt = get_freepage_migratetype(page);
  26025. - if (unlikely(has_isolate_pageblock(zone)))
  26026. - mt = get_pageblock_migratetype(page);
  26027. -
  26028. - /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
  26029. - __free_one_page(page, page_to_pfn(page), zone, 0, mt);
  26030. - trace_mm_page_pcpu_drain(page, 0, mt);
  26031. + list_add(&page->lru, dst);
  26032. } while (--to_free && --batch_free && !list_empty(list));
  26033. }
  26034. - spin_unlock(&zone->lock);
  26035. }
  26036. static void free_one_page(struct zone *zone,
  26037. @@ -770,7 +806,9 @@
  26038. int migratetype)
  26039. {
  26040. unsigned long nr_scanned;
  26041. - spin_lock(&zone->lock);
  26042. + unsigned long flags;
  26043. +
  26044. + spin_lock_irqsave(&zone->lock, flags);
  26045. nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
  26046. if (nr_scanned)
  26047. __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
  26048. @@ -780,7 +818,7 @@
  26049. migratetype = get_pfnblock_migratetype(page, pfn);
  26050. }
  26051. __free_one_page(page, pfn, zone, order, migratetype);
  26052. - spin_unlock(&zone->lock);
  26053. + spin_unlock_irqrestore(&zone->lock, flags);
  26054. }
  26055. static int free_tail_pages_check(struct page *head_page, struct page *page)
  26056. @@ -845,11 +883,11 @@
  26057. return;
  26058. migratetype = get_pfnblock_migratetype(page, pfn);
  26059. - local_irq_save(flags);
  26060. + local_lock_irqsave(pa_lock, flags);
  26061. __count_vm_events(PGFREE, 1 << order);
  26062. set_freepage_migratetype(page, migratetype);
  26063. free_one_page(page_zone(page), page, pfn, order, migratetype);
  26064. - local_irq_restore(flags);
  26065. + local_unlock_irqrestore(pa_lock, flags);
  26066. }
  26067. void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
  26068. @@ -1396,16 +1434,18 @@
  26069. void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  26070. {
  26071. unsigned long flags;
  26072. + LIST_HEAD(dst);
  26073. int to_drain, batch;
  26074. - local_irq_save(flags);
  26075. + local_lock_irqsave(pa_lock, flags);
  26076. batch = READ_ONCE(pcp->batch);
  26077. to_drain = min(pcp->count, batch);
  26078. if (to_drain > 0) {
  26079. - free_pcppages_bulk(zone, to_drain, pcp);
  26080. + isolate_pcp_pages(to_drain, pcp, &dst);
  26081. pcp->count -= to_drain;
  26082. }
  26083. - local_irq_restore(flags);
  26084. + local_unlock_irqrestore(pa_lock, flags);
  26085. + free_pcppages_bulk(zone, to_drain, &dst);
  26086. }
  26087. #endif
  26088. @@ -1421,16 +1461,21 @@
  26089. unsigned long flags;
  26090. struct per_cpu_pageset *pset;
  26091. struct per_cpu_pages *pcp;
  26092. + LIST_HEAD(dst);
  26093. + int count;
  26094. - local_irq_save(flags);
  26095. + cpu_lock_irqsave(cpu, flags);
  26096. pset = per_cpu_ptr(zone->pageset, cpu);
  26097. pcp = &pset->pcp;
  26098. - if (pcp->count) {
  26099. - free_pcppages_bulk(zone, pcp->count, pcp);
  26100. + count = pcp->count;
  26101. + if (count) {
  26102. + isolate_pcp_pages(count, pcp, &dst);
  26103. pcp->count = 0;
  26104. }
  26105. - local_irq_restore(flags);
  26106. + cpu_unlock_irqrestore(cpu, flags);
  26107. + if (count)
  26108. + free_pcppages_bulk(zone, count, &dst);
  26109. }
  26110. /*
  26111. @@ -1516,8 +1561,17 @@
  26112. else
  26113. cpumask_clear_cpu(cpu, &cpus_with_pcps);
  26114. }
  26115. +#ifndef CONFIG_PREEMPT_RT_BASE
  26116. on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
  26117. zone, 1);
  26118. +#else
  26119. + for_each_cpu(cpu, &cpus_with_pcps) {
  26120. + if (zone)
  26121. + drain_pages_zone(cpu, zone);
  26122. + else
  26123. + drain_pages(cpu);
  26124. + }
  26125. +#endif
  26126. }
  26127. #ifdef CONFIG_HIBERNATION
  26128. @@ -1573,7 +1627,7 @@
  26129. migratetype = get_pfnblock_migratetype(page, pfn);
  26130. set_freepage_migratetype(page, migratetype);
  26131. - local_irq_save(flags);
  26132. + local_lock_irqsave(pa_lock, flags);
  26133. __count_vm_event(PGFREE);
  26134. /*
  26135. @@ -1599,12 +1653,17 @@
  26136. pcp->count++;
  26137. if (pcp->count >= pcp->high) {
  26138. unsigned long batch = READ_ONCE(pcp->batch);
  26139. - free_pcppages_bulk(zone, batch, pcp);
  26140. + LIST_HEAD(dst);
  26141. +
  26142. + isolate_pcp_pages(batch, pcp, &dst);
  26143. pcp->count -= batch;
  26144. + local_unlock_irqrestore(pa_lock, flags);
  26145. + free_pcppages_bulk(zone, batch, &dst);
  26146. + return;
  26147. }
  26148. out:
  26149. - local_irq_restore(flags);
  26150. + local_unlock_irqrestore(pa_lock, flags);
  26151. }
  26152. /*
  26153. @@ -1735,7 +1794,7 @@
  26154. struct per_cpu_pages *pcp;
  26155. struct list_head *list;
  26156. - local_irq_save(flags);
  26157. + local_lock_irqsave(pa_lock, flags);
  26158. pcp = &this_cpu_ptr(zone->pageset)->pcp;
  26159. list = &pcp->lists[migratetype];
  26160. if (list_empty(list)) {
  26161. @@ -1767,13 +1826,15 @@
  26162. */
  26163. WARN_ON_ONCE(order > 1);
  26164. }
  26165. - spin_lock_irqsave(&zone->lock, flags);
  26166. + local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
  26167. page = __rmqueue(zone, order, migratetype);
  26168. - spin_unlock(&zone->lock);
  26169. - if (!page)
  26170. + if (!page) {
  26171. + spin_unlock(&zone->lock);
  26172. goto failed;
  26173. + }
  26174. __mod_zone_freepage_state(zone, -(1 << order),
  26175. get_freepage_migratetype(page));
  26176. + spin_unlock(&zone->lock);
  26177. }
  26178. __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
  26179. @@ -1783,13 +1844,13 @@
  26180. __count_zone_vm_events(PGALLOC, zone, 1 << order);
  26181. zone_statistics(preferred_zone, zone, gfp_flags);
  26182. - local_irq_restore(flags);
  26183. + local_unlock_irqrestore(pa_lock, flags);
  26184. VM_BUG_ON_PAGE(bad_range(zone, page), page);
  26185. return page;
  26186. failed:
  26187. - local_irq_restore(flags);
  26188. + local_unlock_irqrestore(pa_lock, flags);
  26189. return NULL;
  26190. }
  26191. @@ -5680,6 +5741,7 @@
  26192. void __init page_alloc_init(void)
  26193. {
  26194. hotcpu_notifier(page_alloc_cpu_notify, 0);
  26195. + local_irq_lock_init(pa_lock);
  26196. }
  26197. /*
  26198. @@ -6575,7 +6637,7 @@
  26199. struct per_cpu_pageset *pset;
  26200. /* avoid races with drain_pages() */
  26201. - local_irq_save(flags);
  26202. + local_lock_irqsave(pa_lock, flags);
  26203. if (zone->pageset != &boot_pageset) {
  26204. for_each_online_cpu(cpu) {
  26205. pset = per_cpu_ptr(zone->pageset, cpu);
  26206. @@ -6584,7 +6646,7 @@
  26207. free_percpu(zone->pageset);
  26208. zone->pageset = &boot_pageset;
  26209. }
  26210. - local_irq_restore(flags);
  26211. + local_unlock_irqrestore(pa_lock, flags);
  26212. }
  26213. #ifdef CONFIG_MEMORY_HOTREMOVE
  26214. diff -Nur linux-4.1.39.orig/mm/percpu.c linux-4.1.39/mm/percpu.c
  26215. --- linux-4.1.39.orig/mm/percpu.c 2017-03-13 21:04:36.000000000 +0100
  26216. +++ linux-4.1.39/mm/percpu.c 2017-04-18 17:56:30.649398527 +0200
  26217. @@ -1282,18 +1282,7 @@
  26218. }
  26219. EXPORT_SYMBOL_GPL(free_percpu);
  26220. -/**
  26221. - * is_kernel_percpu_address - test whether address is from static percpu area
  26222. - * @addr: address to test
  26223. - *
  26224. - * Test whether @addr belongs to in-kernel static percpu area. Module
  26225. - * static percpu areas are not considered. For those, use
  26226. - * is_module_percpu_address().
  26227. - *
  26228. - * RETURNS:
  26229. - * %true if @addr is from in-kernel static percpu area, %false otherwise.
  26230. - */
  26231. -bool is_kernel_percpu_address(unsigned long addr)
  26232. +bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr)
  26233. {
  26234. #ifdef CONFIG_SMP
  26235. const size_t static_size = __per_cpu_end - __per_cpu_start;
  26236. @@ -1302,16 +1291,36 @@
  26237. for_each_possible_cpu(cpu) {
  26238. void *start = per_cpu_ptr(base, cpu);
  26239. + void *va = (void *)addr;
  26240. - if ((void *)addr >= start && (void *)addr < start + static_size)
  26241. + if (va >= start && va < start + static_size) {
  26242. + if (can_addr)
  26243. + *can_addr = (unsigned long) (va - start);
  26244. return true;
  26245. - }
  26246. + }
  26247. + }
  26248. #endif
  26249. /* on UP, can't distinguish from other static vars, always false */
  26250. return false;
  26251. }
  26252. /**
  26253. + * is_kernel_percpu_address - test whether address is from static percpu area
  26254. + * @addr: address to test
  26255. + *
  26256. + * Test whether @addr belongs to in-kernel static percpu area. Module
  26257. + * static percpu areas are not considered. For those, use
  26258. + * is_module_percpu_address().
  26259. + *
  26260. + * RETURNS:
  26261. + * %true if @addr is from in-kernel static percpu area, %false otherwise.
  26262. + */
  26263. +bool is_kernel_percpu_address(unsigned long addr)
  26264. +{
  26265. + return __is_kernel_percpu_address(addr, NULL);
  26266. +}
  26267. +
  26268. +/**
  26269. * per_cpu_ptr_to_phys - convert translated percpu address to physical address
  26270. * @addr: the address to be converted to physical address
  26271. *
  26272. diff -Nur linux-4.1.39.orig/mm/slab.h linux-4.1.39/mm/slab.h
  26273. --- linux-4.1.39.orig/mm/slab.h 2017-03-13 21:04:36.000000000 +0100
  26274. +++ linux-4.1.39/mm/slab.h 2017-04-18 17:56:30.649398527 +0200
  26275. @@ -330,7 +330,11 @@
  26276. * The slab lists for all objects.
  26277. */
  26278. struct kmem_cache_node {
  26279. +#ifdef CONFIG_SLUB
  26280. + raw_spinlock_t list_lock;
  26281. +#else
  26282. spinlock_t list_lock;
  26283. +#endif
  26284. #ifdef CONFIG_SLAB
  26285. struct list_head slabs_partial; /* partial list first, better asm code */
  26286. diff -Nur linux-4.1.39.orig/mm/slub.c linux-4.1.39/mm/slub.c
  26287. --- linux-4.1.39.orig/mm/slub.c 2017-03-13 21:04:36.000000000 +0100
  26288. +++ linux-4.1.39/mm/slub.c 2017-04-18 17:56:30.653398681 +0200
  26289. @@ -1069,7 +1069,7 @@
  26290. {
  26291. struct kmem_cache_node *n = get_node(s, page_to_nid(page));
  26292. - spin_lock_irqsave(&n->list_lock, *flags);
  26293. + raw_spin_lock_irqsave(&n->list_lock, *flags);
  26294. slab_lock(page);
  26295. if (!check_slab(s, page))
  26296. @@ -1116,7 +1116,7 @@
  26297. fail:
  26298. slab_unlock(page);
  26299. - spin_unlock_irqrestore(&n->list_lock, *flags);
  26300. + raw_spin_unlock_irqrestore(&n->list_lock, *flags);
  26301. slab_fix(s, "Object at 0x%p not freed", object);
  26302. return NULL;
  26303. }
  26304. @@ -1242,6 +1242,12 @@
  26305. #endif /* CONFIG_SLUB_DEBUG */
  26306. +struct slub_free_list {
  26307. + raw_spinlock_t lock;
  26308. + struct list_head list;
  26309. +};
  26310. +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
  26311. +
  26312. /*
  26313. * Hooks for other subsystems that check memory allocations. In a typical
  26314. * production configuration these hooks all should produce no code at all.
  26315. @@ -1306,6 +1312,17 @@
  26316. kasan_slab_free(s, x);
  26317. }
  26318. +static void setup_object(struct kmem_cache *s, struct page *page,
  26319. + void *object)
  26320. +{
  26321. + setup_object_debug(s, page, object);
  26322. + if (unlikely(s->ctor)) {
  26323. + kasan_unpoison_object_data(s, object);
  26324. + s->ctor(object);
  26325. + kasan_poison_object_data(s, object);
  26326. + }
  26327. +}
  26328. +
  26329. /*
  26330. * Slab allocation and freeing
  26331. */
  26332. @@ -1336,10 +1353,17 @@
  26333. struct page *page;
  26334. struct kmem_cache_order_objects oo = s->oo;
  26335. gfp_t alloc_gfp;
  26336. + void *start, *p;
  26337. + int idx, order;
  26338. + bool enableirqs;
  26339. flags &= gfp_allowed_mask;
  26340. - if (flags & __GFP_WAIT)
  26341. + enableirqs = (flags & __GFP_WAIT) != 0;
  26342. +#ifdef CONFIG_PREEMPT_RT_FULL
  26343. + enableirqs |= system_state == SYSTEM_RUNNING;
  26344. +#endif
  26345. + if (enableirqs)
  26346. local_irq_enable();
  26347. flags |= s->allocflags;
  26348. @@ -1359,13 +1383,13 @@
  26349. * Try a lower order alloc if possible
  26350. */
  26351. page = alloc_slab_page(s, alloc_gfp, node, oo);
  26352. -
  26353. - if (page)
  26354. - stat(s, ORDER_FALLBACK);
  26355. + if (unlikely(!page))
  26356. + goto out;
  26357. + stat(s, ORDER_FALLBACK);
  26358. }
  26359. - if (kmemcheck_enabled && page
  26360. - && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
  26361. + if (kmemcheck_enabled &&
  26362. + !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
  26363. int pages = 1 << oo_order(oo);
  26364. kmemcheck_alloc_shadow(page, oo_order(oo), alloc_gfp, node);
  26365. @@ -1380,51 +1404,9 @@
  26366. kmemcheck_mark_unallocated_pages(page, pages);
  26367. }
  26368. - if (flags & __GFP_WAIT)
  26369. - local_irq_disable();
  26370. - if (!page)
  26371. - return NULL;
  26372. -
  26373. page->objects = oo_objects(oo);
  26374. - mod_zone_page_state(page_zone(page),
  26375. - (s->flags & SLAB_RECLAIM_ACCOUNT) ?
  26376. - NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
  26377. - 1 << oo_order(oo));
  26378. -
  26379. - return page;
  26380. -}
  26381. -
  26382. -static void setup_object(struct kmem_cache *s, struct page *page,
  26383. - void *object)
  26384. -{
  26385. - setup_object_debug(s, page, object);
  26386. - if (unlikely(s->ctor)) {
  26387. - kasan_unpoison_object_data(s, object);
  26388. - s->ctor(object);
  26389. - kasan_poison_object_data(s, object);
  26390. - }
  26391. -}
  26392. -
  26393. -static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
  26394. -{
  26395. - struct page *page;
  26396. - void *start;
  26397. - void *p;
  26398. - int order;
  26399. - int idx;
  26400. -
  26401. - if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
  26402. - pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
  26403. - BUG();
  26404. - }
  26405. -
  26406. - page = allocate_slab(s,
  26407. - flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
  26408. - if (!page)
  26409. - goto out;
  26410. order = compound_order(page);
  26411. - inc_slabs_node(s, page_to_nid(page), page->objects);
  26412. page->slab_cache = s;
  26413. __SetPageSlab(page);
  26414. if (page_is_pfmemalloc(page))
  26415. @@ -1448,10 +1430,34 @@
  26416. page->freelist = start;
  26417. page->inuse = page->objects;
  26418. page->frozen = 1;
  26419. +
  26420. out:
  26421. + if (enableirqs)
  26422. + local_irq_disable();
  26423. + if (!page)
  26424. + return NULL;
  26425. +
  26426. + mod_zone_page_state(page_zone(page),
  26427. + (s->flags & SLAB_RECLAIM_ACCOUNT) ?
  26428. + NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
  26429. + 1 << oo_order(oo));
  26430. +
  26431. + inc_slabs_node(s, page_to_nid(page), page->objects);
  26432. +
  26433. return page;
  26434. }
  26435. +static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
  26436. +{
  26437. + if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
  26438. + pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
  26439. + BUG();
  26440. + }
  26441. +
  26442. + return allocate_slab(s,
  26443. + flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
  26444. +}
  26445. +
  26446. static void __free_slab(struct kmem_cache *s, struct page *page)
  26447. {
  26448. int order = compound_order(page);
  26449. @@ -1483,6 +1489,16 @@
  26450. memcg_uncharge_slab(s, order);
  26451. }
  26452. +static void free_delayed(struct list_head *h)
  26453. +{
  26454. + while(!list_empty(h)) {
  26455. + struct page *page = list_first_entry(h, struct page, lru);
  26456. +
  26457. + list_del(&page->lru);
  26458. + __free_slab(page->slab_cache, page);
  26459. + }
  26460. +}
  26461. +
  26462. #define need_reserve_slab_rcu \
  26463. (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
  26464. @@ -1517,6 +1533,12 @@
  26465. }
  26466. call_rcu(head, rcu_free_slab);
  26467. + } else if (irqs_disabled()) {
  26468. + struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
  26469. +
  26470. + raw_spin_lock(&f->lock);
  26471. + list_add(&page->lru, &f->list);
  26472. + raw_spin_unlock(&f->lock);
  26473. } else
  26474. __free_slab(s, page);
  26475. }
  26476. @@ -1630,7 +1652,7 @@
  26477. if (!n || !n->nr_partial)
  26478. return NULL;
  26479. - spin_lock(&n->list_lock);
  26480. + raw_spin_lock(&n->list_lock);
  26481. list_for_each_entry_safe(page, page2, &n->partial, lru) {
  26482. void *t;
  26483. @@ -1655,7 +1677,7 @@
  26484. break;
  26485. }
  26486. - spin_unlock(&n->list_lock);
  26487. + raw_spin_unlock(&n->list_lock);
  26488. return object;
  26489. }
  26490. @@ -1901,7 +1923,7 @@
  26491. * that acquire_slab() will see a slab page that
  26492. * is frozen
  26493. */
  26494. - spin_lock(&n->list_lock);
  26495. + raw_spin_lock(&n->list_lock);
  26496. }
  26497. } else {
  26498. m = M_FULL;
  26499. @@ -1912,7 +1934,7 @@
  26500. * slabs from diagnostic functions will not see
  26501. * any frozen slabs.
  26502. */
  26503. - spin_lock(&n->list_lock);
  26504. + raw_spin_lock(&n->list_lock);
  26505. }
  26506. }
  26507. @@ -1947,7 +1969,7 @@
  26508. goto redo;
  26509. if (lock)
  26510. - spin_unlock(&n->list_lock);
  26511. + raw_spin_unlock(&n->list_lock);
  26512. if (m == M_FREE) {
  26513. stat(s, DEACTIVATE_EMPTY);
  26514. @@ -1979,10 +2001,10 @@
  26515. n2 = get_node(s, page_to_nid(page));
  26516. if (n != n2) {
  26517. if (n)
  26518. - spin_unlock(&n->list_lock);
  26519. + raw_spin_unlock(&n->list_lock);
  26520. n = n2;
  26521. - spin_lock(&n->list_lock);
  26522. + raw_spin_lock(&n->list_lock);
  26523. }
  26524. do {
  26525. @@ -2011,7 +2033,7 @@
  26526. }
  26527. if (n)
  26528. - spin_unlock(&n->list_lock);
  26529. + raw_spin_unlock(&n->list_lock);
  26530. while (discard_page) {
  26531. page = discard_page;
  26532. @@ -2050,14 +2072,21 @@
  26533. pobjects = oldpage->pobjects;
  26534. pages = oldpage->pages;
  26535. if (drain && pobjects > s->cpu_partial) {
  26536. + struct slub_free_list *f;
  26537. unsigned long flags;
  26538. + LIST_HEAD(tofree);
  26539. /*
  26540. * partial array is full. Move the existing
  26541. * set to the per node partial list.
  26542. */
  26543. local_irq_save(flags);
  26544. unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
  26545. + f = this_cpu_ptr(&slub_free_list);
  26546. + raw_spin_lock(&f->lock);
  26547. + list_splice_init(&f->list, &tofree);
  26548. + raw_spin_unlock(&f->lock);
  26549. local_irq_restore(flags);
  26550. + free_delayed(&tofree);
  26551. oldpage = NULL;
  26552. pobjects = 0;
  26553. pages = 0;
  26554. @@ -2129,7 +2158,22 @@
  26555. static void flush_all(struct kmem_cache *s)
  26556. {
  26557. + LIST_HEAD(tofree);
  26558. + int cpu;
  26559. +
  26560. on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
  26561. + for_each_online_cpu(cpu) {
  26562. + struct slub_free_list *f;
  26563. +
  26564. + if (!has_cpu_slab(cpu, s))
  26565. + continue;
  26566. +
  26567. + f = &per_cpu(slub_free_list, cpu);
  26568. + raw_spin_lock_irq(&f->lock);
  26569. + list_splice_init(&f->list, &tofree);
  26570. + raw_spin_unlock_irq(&f->lock);
  26571. + free_delayed(&tofree);
  26572. + }
  26573. }
  26574. /*
  26575. @@ -2165,10 +2209,10 @@
  26576. unsigned long x = 0;
  26577. struct page *page;
  26578. - spin_lock_irqsave(&n->list_lock, flags);
  26579. + raw_spin_lock_irqsave(&n->list_lock, flags);
  26580. list_for_each_entry(page, &n->partial, lru)
  26581. x += get_count(page);
  26582. - spin_unlock_irqrestore(&n->list_lock, flags);
  26583. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26584. return x;
  26585. }
  26586. #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
  26587. @@ -2305,9 +2349,11 @@
  26588. static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  26589. unsigned long addr, struct kmem_cache_cpu *c)
  26590. {
  26591. + struct slub_free_list *f;
  26592. void *freelist;
  26593. struct page *page;
  26594. unsigned long flags;
  26595. + LIST_HEAD(tofree);
  26596. local_irq_save(flags);
  26597. #ifdef CONFIG_PREEMPT
  26598. @@ -2375,7 +2421,13 @@
  26599. VM_BUG_ON(!c->page->frozen);
  26600. c->freelist = get_freepointer(s, freelist);
  26601. c->tid = next_tid(c->tid);
  26602. +out:
  26603. + f = this_cpu_ptr(&slub_free_list);
  26604. + raw_spin_lock(&f->lock);
  26605. + list_splice_init(&f->list, &tofree);
  26606. + raw_spin_unlock(&f->lock);
  26607. local_irq_restore(flags);
  26608. + free_delayed(&tofree);
  26609. return freelist;
  26610. new_slab:
  26611. @@ -2392,8 +2444,7 @@
  26612. if (unlikely(!freelist)) {
  26613. slab_out_of_memory(s, gfpflags, node);
  26614. - local_irq_restore(flags);
  26615. - return NULL;
  26616. + goto out;
  26617. }
  26618. page = c->page;
  26619. @@ -2408,8 +2459,7 @@
  26620. deactivate_slab(s, page, get_freepointer(s, freelist));
  26621. c->page = NULL;
  26622. c->freelist = NULL;
  26623. - local_irq_restore(flags);
  26624. - return freelist;
  26625. + goto out;
  26626. }
  26627. /*
  26628. @@ -2593,7 +2643,7 @@
  26629. do {
  26630. if (unlikely(n)) {
  26631. - spin_unlock_irqrestore(&n->list_lock, flags);
  26632. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26633. n = NULL;
  26634. }
  26635. prior = page->freelist;
  26636. @@ -2625,7 +2675,7 @@
  26637. * Otherwise the list_lock will synchronize with
  26638. * other processors updating the list of slabs.
  26639. */
  26640. - spin_lock_irqsave(&n->list_lock, flags);
  26641. + raw_spin_lock_irqsave(&n->list_lock, flags);
  26642. }
  26643. }
  26644. @@ -2667,7 +2717,7 @@
  26645. add_partial(n, page, DEACTIVATE_TO_TAIL);
  26646. stat(s, FREE_ADD_PARTIAL);
  26647. }
  26648. - spin_unlock_irqrestore(&n->list_lock, flags);
  26649. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26650. return;
  26651. slab_empty:
  26652. @@ -2682,7 +2732,7 @@
  26653. remove_full(s, n, page);
  26654. }
  26655. - spin_unlock_irqrestore(&n->list_lock, flags);
  26656. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26657. stat(s, FREE_SLAB);
  26658. discard_slab(s, page);
  26659. }
  26660. @@ -2881,7 +2931,7 @@
  26661. init_kmem_cache_node(struct kmem_cache_node *n)
  26662. {
  26663. n->nr_partial = 0;
  26664. - spin_lock_init(&n->list_lock);
  26665. + raw_spin_lock_init(&n->list_lock);
  26666. INIT_LIST_HEAD(&n->partial);
  26667. #ifdef CONFIG_SLUB_DEBUG
  26668. atomic_long_set(&n->nr_slabs, 0);
  26669. @@ -3463,7 +3513,7 @@
  26670. for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
  26671. INIT_LIST_HEAD(promote + i);
  26672. - spin_lock_irqsave(&n->list_lock, flags);
  26673. + raw_spin_lock_irqsave(&n->list_lock, flags);
  26674. /*
  26675. * Build lists of slabs to discard or promote.
  26676. @@ -3494,7 +3544,7 @@
  26677. for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
  26678. list_splice(promote + i, &n->partial);
  26679. - spin_unlock_irqrestore(&n->list_lock, flags);
  26680. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26681. /* Release empty slabs */
  26682. list_for_each_entry_safe(page, t, &discard, lru)
  26683. @@ -3670,6 +3720,12 @@
  26684. {
  26685. static __initdata struct kmem_cache boot_kmem_cache,
  26686. boot_kmem_cache_node;
  26687. + int cpu;
  26688. +
  26689. + for_each_possible_cpu(cpu) {
  26690. + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
  26691. + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
  26692. + }
  26693. if (debug_guardpage_minorder())
  26694. slub_max_order = 0;
  26695. @@ -3912,7 +3968,7 @@
  26696. struct page *page;
  26697. unsigned long flags;
  26698. - spin_lock_irqsave(&n->list_lock, flags);
  26699. + raw_spin_lock_irqsave(&n->list_lock, flags);
  26700. list_for_each_entry(page, &n->partial, lru) {
  26701. validate_slab_slab(s, page, map);
  26702. @@ -3934,7 +3990,7 @@
  26703. s->name, count, atomic_long_read(&n->nr_slabs));
  26704. out:
  26705. - spin_unlock_irqrestore(&n->list_lock, flags);
  26706. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26707. return count;
  26708. }
  26709. @@ -4122,12 +4178,12 @@
  26710. if (!atomic_long_read(&n->nr_slabs))
  26711. continue;
  26712. - spin_lock_irqsave(&n->list_lock, flags);
  26713. + raw_spin_lock_irqsave(&n->list_lock, flags);
  26714. list_for_each_entry(page, &n->partial, lru)
  26715. process_slab(&t, s, page, alloc, map);
  26716. list_for_each_entry(page, &n->full, lru)
  26717. process_slab(&t, s, page, alloc, map);
  26718. - spin_unlock_irqrestore(&n->list_lock, flags);
  26719. + raw_spin_unlock_irqrestore(&n->list_lock, flags);
  26720. }
  26721. for (i = 0; i < t.count; i++) {
  26722. diff -Nur linux-4.1.39.orig/mm/swap.c linux-4.1.39/mm/swap.c
  26723. --- linux-4.1.39.orig/mm/swap.c 2017-03-13 21:04:36.000000000 +0100
  26724. +++ linux-4.1.39/mm/swap.c 2017-04-18 17:56:30.653398681 +0200
  26725. @@ -32,6 +32,7 @@
  26726. #include <linux/gfp.h>
  26727. #include <linux/uio.h>
  26728. #include <linux/hugetlb.h>
  26729. +#include <linux/locallock.h>
  26730. #include "internal.h"
  26731. @@ -45,6 +46,9 @@
  26732. static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
  26733. static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
  26734. +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
  26735. +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
  26736. +
  26737. /*
  26738. * This path almost never happens for VM activity - pages are normally
  26739. * freed via pagevecs. But it gets used by networking.
  26740. @@ -481,11 +485,11 @@
  26741. unsigned long flags;
  26742. page_cache_get(page);
  26743. - local_irq_save(flags);
  26744. + local_lock_irqsave(rotate_lock, flags);
  26745. pvec = this_cpu_ptr(&lru_rotate_pvecs);
  26746. if (!pagevec_add(pvec, page) || PageCompound(page))
  26747. pagevec_move_tail(pvec);
  26748. - local_irq_restore(flags);
  26749. + local_unlock_irqrestore(rotate_lock, flags);
  26750. }
  26751. }
  26752. @@ -536,12 +540,13 @@
  26753. void activate_page(struct page *page)
  26754. {
  26755. if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  26756. - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
  26757. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  26758. + activate_page_pvecs);
  26759. page_cache_get(page);
  26760. if (!pagevec_add(pvec, page) || PageCompound(page))
  26761. pagevec_lru_move_fn(pvec, __activate_page, NULL);
  26762. - put_cpu_var(activate_page_pvecs);
  26763. + put_locked_var(swapvec_lock, activate_page_pvecs);
  26764. }
  26765. }
  26766. @@ -567,7 +572,7 @@
  26767. static void __lru_cache_activate_page(struct page *page)
  26768. {
  26769. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  26770. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  26771. int i;
  26772. /*
  26773. @@ -589,7 +594,7 @@
  26774. }
  26775. }
  26776. - put_cpu_var(lru_add_pvec);
  26777. + put_locked_var(swapvec_lock, lru_add_pvec);
  26778. }
  26779. /*
  26780. @@ -628,12 +633,12 @@
  26781. static void __lru_cache_add(struct page *page)
  26782. {
  26783. - struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
  26784. + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
  26785. page_cache_get(page);
  26786. if (!pagevec_add(pvec, page) || PageCompound(page))
  26787. __pagevec_lru_add(pvec);
  26788. - put_cpu_var(lru_add_pvec);
  26789. + put_locked_var(swapvec_lock, lru_add_pvec);
  26790. }
  26791. /**
  26792. @@ -813,9 +818,15 @@
  26793. unsigned long flags;
  26794. /* No harm done if a racing interrupt already did this */
  26795. - local_irq_save(flags);
  26796. +#ifdef CONFIG_PREEMPT_RT_BASE
  26797. + local_lock_irqsave_on(rotate_lock, flags, cpu);
  26798. + pagevec_move_tail(pvec);
  26799. + local_unlock_irqrestore_on(rotate_lock, flags, cpu);
  26800. +#else
  26801. + local_lock_irqsave(rotate_lock, flags);
  26802. pagevec_move_tail(pvec);
  26803. - local_irq_restore(flags);
  26804. + local_unlock_irqrestore(rotate_lock, flags);
  26805. +#endif
  26806. }
  26807. pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
  26808. @@ -843,26 +854,47 @@
  26809. return;
  26810. if (likely(get_page_unless_zero(page))) {
  26811. - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
  26812. + struct pagevec *pvec = &get_locked_var(swapvec_lock,
  26813. + lru_deactivate_file_pvecs);
  26814. if (!pagevec_add(pvec, page) || PageCompound(page))
  26815. pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
  26816. - put_cpu_var(lru_deactivate_file_pvecs);
  26817. + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs);
  26818. }
  26819. }
  26820. void lru_add_drain(void)
  26821. {
  26822. - lru_add_drain_cpu(get_cpu());
  26823. - put_cpu();
  26824. + lru_add_drain_cpu(local_lock_cpu(swapvec_lock));
  26825. + local_unlock_cpu(swapvec_lock);
  26826. }
  26827. +
  26828. +#ifdef CONFIG_PREEMPT_RT_BASE
  26829. +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  26830. +{
  26831. + local_lock_on(swapvec_lock, cpu);
  26832. + lru_add_drain_cpu(cpu);
  26833. + local_unlock_on(swapvec_lock, cpu);
  26834. +}
  26835. +
  26836. +#else
  26837. +
  26838. static void lru_add_drain_per_cpu(struct work_struct *dummy)
  26839. {
  26840. lru_add_drain();
  26841. }
  26842. static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
  26843. +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  26844. +{
  26845. + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  26846. +
  26847. + INIT_WORK(work, lru_add_drain_per_cpu);
  26848. + schedule_work_on(cpu, work);
  26849. + cpumask_set_cpu(cpu, has_work);
  26850. +}
  26851. +#endif
  26852. void lru_add_drain_all(void)
  26853. {
  26854. @@ -875,20 +907,17 @@
  26855. cpumask_clear(&has_work);
  26856. for_each_online_cpu(cpu) {
  26857. - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
  26858. -
  26859. if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
  26860. pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
  26861. pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
  26862. - need_activate_page_drain(cpu)) {
  26863. - INIT_WORK(work, lru_add_drain_per_cpu);
  26864. - schedule_work_on(cpu, work);
  26865. - cpumask_set_cpu(cpu, &has_work);
  26866. - }
  26867. + need_activate_page_drain(cpu))
  26868. + remote_lru_add_drain(cpu, &has_work);
  26869. }
  26870. +#ifndef CONFIG_PREEMPT_RT_BASE
  26871. for_each_cpu(cpu, &has_work)
  26872. flush_work(&per_cpu(lru_add_drain_work, cpu));
  26873. +#endif
  26874. put_online_cpus();
  26875. mutex_unlock(&lock);
  26876. diff -Nur linux-4.1.39.orig/mm/truncate.c linux-4.1.39/mm/truncate.c
  26877. --- linux-4.1.39.orig/mm/truncate.c 2017-03-13 21:04:36.000000000 +0100
  26878. +++ linux-4.1.39/mm/truncate.c 2017-04-18 17:56:30.653398681 +0200
  26879. @@ -56,8 +56,11 @@
  26880. * protected by mapping->tree_lock.
  26881. */
  26882. if (!workingset_node_shadows(node) &&
  26883. - !list_empty(&node->private_list))
  26884. - list_lru_del(&workingset_shadow_nodes, &node->private_list);
  26885. + !list_empty(&node->private_list)) {
  26886. + local_lock(workingset_shadow_lock);
  26887. + list_lru_del(&__workingset_shadow_nodes, &node->private_list);
  26888. + local_unlock(workingset_shadow_lock);
  26889. + }
  26890. __radix_tree_delete_node(&mapping->page_tree, node);
  26891. unlock:
  26892. spin_unlock_irq(&mapping->tree_lock);
  26893. diff -Nur linux-4.1.39.orig/mm/vmalloc.c linux-4.1.39/mm/vmalloc.c
  26894. --- linux-4.1.39.orig/mm/vmalloc.c 2017-03-13 21:04:36.000000000 +0100
  26895. +++ linux-4.1.39/mm/vmalloc.c 2017-04-18 17:56:30.653398681 +0200
  26896. @@ -819,7 +819,7 @@
  26897. struct vmap_block *vb;
  26898. struct vmap_area *va;
  26899. unsigned long vb_idx;
  26900. - int node, err;
  26901. + int node, err, cpu;
  26902. void *vaddr;
  26903. node = numa_node_id();
  26904. @@ -862,11 +862,12 @@
  26905. BUG_ON(err);
  26906. radix_tree_preload_end();
  26907. - vbq = &get_cpu_var(vmap_block_queue);
  26908. + cpu = get_cpu_light();
  26909. + vbq = this_cpu_ptr(&vmap_block_queue);
  26910. spin_lock(&vbq->lock);
  26911. list_add_tail_rcu(&vb->free_list, &vbq->free);
  26912. spin_unlock(&vbq->lock);
  26913. - put_cpu_var(vmap_block_queue);
  26914. + put_cpu_light();
  26915. return vaddr;
  26916. }
  26917. @@ -935,6 +936,7 @@
  26918. struct vmap_block *vb;
  26919. void *vaddr = NULL;
  26920. unsigned int order;
  26921. + int cpu;
  26922. BUG_ON(size & ~PAGE_MASK);
  26923. BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
  26924. @@ -949,7 +951,8 @@
  26925. order = get_order(size);
  26926. rcu_read_lock();
  26927. - vbq = &get_cpu_var(vmap_block_queue);
  26928. + cpu = get_cpu_light();
  26929. + vbq = this_cpu_ptr(&vmap_block_queue);
  26930. list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  26931. unsigned long pages_off;
  26932. @@ -972,7 +975,7 @@
  26933. break;
  26934. }
  26935. - put_cpu_var(vmap_block_queue);
  26936. + put_cpu_light();
  26937. rcu_read_unlock();
  26938. /* Allocate new block if nothing was found */
  26939. diff -Nur linux-4.1.39.orig/mm/vmstat.c linux-4.1.39/mm/vmstat.c
  26940. --- linux-4.1.39.orig/mm/vmstat.c 2017-03-13 21:04:36.000000000 +0100
  26941. +++ linux-4.1.39/mm/vmstat.c 2017-04-18 17:56:30.653398681 +0200
  26942. @@ -226,6 +226,7 @@
  26943. long x;
  26944. long t;
  26945. + preempt_disable_rt();
  26946. x = delta + __this_cpu_read(*p);
  26947. t = __this_cpu_read(pcp->stat_threshold);
  26948. @@ -235,6 +236,7 @@
  26949. x = 0;
  26950. }
  26951. __this_cpu_write(*p, x);
  26952. + preempt_enable_rt();
  26953. }
  26954. EXPORT_SYMBOL(__mod_zone_page_state);
  26955. @@ -267,6 +269,7 @@
  26956. s8 __percpu *p = pcp->vm_stat_diff + item;
  26957. s8 v, t;
  26958. + preempt_disable_rt();
  26959. v = __this_cpu_inc_return(*p);
  26960. t = __this_cpu_read(pcp->stat_threshold);
  26961. if (unlikely(v > t)) {
  26962. @@ -275,6 +278,7 @@
  26963. zone_page_state_add(v + overstep, zone, item);
  26964. __this_cpu_write(*p, -overstep);
  26965. }
  26966. + preempt_enable_rt();
  26967. }
  26968. void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
  26969. @@ -289,6 +293,7 @@
  26970. s8 __percpu *p = pcp->vm_stat_diff + item;
  26971. s8 v, t;
  26972. + preempt_disable_rt();
  26973. v = __this_cpu_dec_return(*p);
  26974. t = __this_cpu_read(pcp->stat_threshold);
  26975. if (unlikely(v < - t)) {
  26976. @@ -297,6 +302,7 @@
  26977. zone_page_state_add(v - overstep, zone, item);
  26978. __this_cpu_write(*p, overstep);
  26979. }
  26980. + preempt_enable_rt();
  26981. }
  26982. void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
  26983. diff -Nur linux-4.1.39.orig/mm/workingset.c linux-4.1.39/mm/workingset.c
  26984. --- linux-4.1.39.orig/mm/workingset.c 2017-03-13 21:04:36.000000000 +0100
  26985. +++ linux-4.1.39/mm/workingset.c 2017-04-18 17:56:30.653398681 +0200
  26986. @@ -264,7 +264,8 @@
  26987. * point where they would still be useful.
  26988. */
  26989. -struct list_lru workingset_shadow_nodes;
  26990. +struct list_lru __workingset_shadow_nodes;
  26991. +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock);
  26992. static unsigned long count_shadow_nodes(struct shrinker *shrinker,
  26993. struct shrink_control *sc)
  26994. @@ -274,9 +275,9 @@
  26995. unsigned long pages;
  26996. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  26997. - local_irq_disable();
  26998. - shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
  26999. - local_irq_enable();
  27000. + local_lock_irq(workingset_shadow_lock);
  27001. + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc);
  27002. + local_unlock_irq(workingset_shadow_lock);
  27003. pages = node_present_pages(sc->nid);
  27004. /*
  27005. @@ -363,9 +364,9 @@
  27006. spin_unlock(&mapping->tree_lock);
  27007. ret = LRU_REMOVED_RETRY;
  27008. out:
  27009. - local_irq_enable();
  27010. + local_unlock_irq(workingset_shadow_lock);
  27011. cond_resched();
  27012. - local_irq_disable();
  27013. + local_lock_irq(workingset_shadow_lock);
  27014. spin_lock(lru_lock);
  27015. return ret;
  27016. }
  27017. @@ -376,10 +377,10 @@
  27018. unsigned long ret;
  27019. /* list_lru lock nests inside IRQ-safe mapping->tree_lock */
  27020. - local_irq_disable();
  27021. - ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc,
  27022. + local_lock_irq(workingset_shadow_lock);
  27023. + ret = list_lru_shrink_walk(&__workingset_shadow_nodes, sc,
  27024. shadow_lru_isolate, NULL);
  27025. - local_irq_enable();
  27026. + local_unlock_irq(workingset_shadow_lock);
  27027. return ret;
  27028. }
  27029. @@ -400,7 +401,7 @@
  27030. {
  27031. int ret;
  27032. - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
  27033. + ret = list_lru_init_key(&__workingset_shadow_nodes, &shadow_nodes_key);
  27034. if (ret)
  27035. goto err;
  27036. ret = register_shrinker(&workingset_shadow_shrinker);
  27037. @@ -408,7 +409,7 @@
  27038. goto err_list_lru;
  27039. return 0;
  27040. err_list_lru:
  27041. - list_lru_destroy(&workingset_shadow_nodes);
  27042. + list_lru_destroy(&__workingset_shadow_nodes);
  27043. err:
  27044. return ret;
  27045. }
  27046. diff -Nur linux-4.1.39.orig/mm/zsmalloc.c linux-4.1.39/mm/zsmalloc.c
  27047. --- linux-4.1.39.orig/mm/zsmalloc.c 2017-03-13 21:04:36.000000000 +0100
  27048. +++ linux-4.1.39/mm/zsmalloc.c 2017-04-18 17:56:30.653398681 +0200
  27049. @@ -68,6 +68,7 @@
  27050. #include <linux/debugfs.h>
  27051. #include <linux/zsmalloc.h>
  27052. #include <linux/zpool.h>
  27053. +#include <linux/locallock.h>
  27054. /*
  27055. * This must be power of 2 and greater than of equal to sizeof(link_free).
  27056. @@ -398,6 +399,7 @@
  27057. /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
  27058. static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
  27059. +static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock);
  27060. static int is_first_page(struct page *page)
  27061. {
  27062. @@ -1289,7 +1291,7 @@
  27063. class = pool->size_class[class_idx];
  27064. off = obj_idx_to_offset(page, obj_idx, class->size);
  27065. - area = &get_cpu_var(zs_map_area);
  27066. + area = &get_locked_var(zs_map_area_lock, zs_map_area);
  27067. area->vm_mm = mm;
  27068. if (off + class->size <= PAGE_SIZE) {
  27069. /* this object is contained entirely within a page */
  27070. @@ -1342,7 +1344,7 @@
  27071. __zs_unmap_object(area, pages, off, class->size);
  27072. }
  27073. - put_cpu_var(zs_map_area);
  27074. + put_locked_var(zs_map_area_lock, zs_map_area);
  27075. unpin_tag(handle);
  27076. }
  27077. EXPORT_SYMBOL_GPL(zs_unmap_object);
  27078. diff -Nur linux-4.1.39.orig/net/core/dev.c linux-4.1.39/net/core/dev.c
  27079. --- linux-4.1.39.orig/net/core/dev.c 2017-03-13 21:04:36.000000000 +0100
  27080. +++ linux-4.1.39/net/core/dev.c 2017-04-18 17:56:30.653398681 +0200
  27081. @@ -184,6 +184,7 @@
  27082. static DEFINE_HASHTABLE(napi_hash, 8);
  27083. static seqcount_t devnet_rename_seq;
  27084. +static DEFINE_MUTEX(devnet_rename_mutex);
  27085. static inline void dev_base_seq_inc(struct net *net)
  27086. {
  27087. @@ -205,14 +206,14 @@
  27088. static inline void rps_lock(struct softnet_data *sd)
  27089. {
  27090. #ifdef CONFIG_RPS
  27091. - spin_lock(&sd->input_pkt_queue.lock);
  27092. + raw_spin_lock(&sd->input_pkt_queue.raw_lock);
  27093. #endif
  27094. }
  27095. static inline void rps_unlock(struct softnet_data *sd)
  27096. {
  27097. #ifdef CONFIG_RPS
  27098. - spin_unlock(&sd->input_pkt_queue.lock);
  27099. + raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
  27100. #endif
  27101. }
  27102. @@ -852,7 +853,8 @@
  27103. strcpy(name, dev->name);
  27104. rcu_read_unlock();
  27105. if (read_seqcount_retry(&devnet_rename_seq, seq)) {
  27106. - cond_resched();
  27107. + mutex_lock(&devnet_rename_mutex);
  27108. + mutex_unlock(&devnet_rename_mutex);
  27109. goto retry;
  27110. }
  27111. @@ -1121,20 +1123,17 @@
  27112. if (dev->flags & IFF_UP)
  27113. return -EBUSY;
  27114. - write_seqcount_begin(&devnet_rename_seq);
  27115. + mutex_lock(&devnet_rename_mutex);
  27116. + __raw_write_seqcount_begin(&devnet_rename_seq);
  27117. - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
  27118. - write_seqcount_end(&devnet_rename_seq);
  27119. - return 0;
  27120. - }
  27121. + if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
  27122. + goto outunlock;
  27123. memcpy(oldname, dev->name, IFNAMSIZ);
  27124. err = dev_get_valid_name(net, dev, newname);
  27125. - if (err < 0) {
  27126. - write_seqcount_end(&devnet_rename_seq);
  27127. - return err;
  27128. - }
  27129. + if (err < 0)
  27130. + goto outunlock;
  27131. if (oldname[0] && !strchr(oldname, '%'))
  27132. netdev_info(dev, "renamed from %s\n", oldname);
  27133. @@ -1147,11 +1146,12 @@
  27134. if (ret) {
  27135. memcpy(dev->name, oldname, IFNAMSIZ);
  27136. dev->name_assign_type = old_assign_type;
  27137. - write_seqcount_end(&devnet_rename_seq);
  27138. - return ret;
  27139. + err = ret;
  27140. + goto outunlock;
  27141. }
  27142. - write_seqcount_end(&devnet_rename_seq);
  27143. + __raw_write_seqcount_end(&devnet_rename_seq);
  27144. + mutex_unlock(&devnet_rename_mutex);
  27145. netdev_adjacent_rename_links(dev, oldname);
  27146. @@ -1172,7 +1172,8 @@
  27147. /* err >= 0 after dev_alloc_name() or stores the first errno */
  27148. if (err >= 0) {
  27149. err = ret;
  27150. - write_seqcount_begin(&devnet_rename_seq);
  27151. + mutex_lock(&devnet_rename_mutex);
  27152. + __raw_write_seqcount_begin(&devnet_rename_seq);
  27153. memcpy(dev->name, oldname, IFNAMSIZ);
  27154. memcpy(oldname, newname, IFNAMSIZ);
  27155. dev->name_assign_type = old_assign_type;
  27156. @@ -1185,6 +1186,11 @@
  27157. }
  27158. return err;
  27159. +
  27160. +outunlock:
  27161. + __raw_write_seqcount_end(&devnet_rename_seq);
  27162. + mutex_unlock(&devnet_rename_mutex);
  27163. + return err;
  27164. }
  27165. /**
  27166. @@ -2214,6 +2220,7 @@
  27167. sd->output_queue_tailp = &q->next_sched;
  27168. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  27169. local_irq_restore(flags);
  27170. + preempt_check_resched_rt();
  27171. }
  27172. void __netif_schedule(struct Qdisc *q)
  27173. @@ -2295,6 +2302,7 @@
  27174. __this_cpu_write(softnet_data.completion_queue, skb);
  27175. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  27176. local_irq_restore(flags);
  27177. + preempt_check_resched_rt();
  27178. }
  27179. EXPORT_SYMBOL(__dev_kfree_skb_irq);
  27180. @@ -2820,7 +2828,11 @@
  27181. * This permits __QDISC___STATE_RUNNING owner to get the lock more
  27182. * often and dequeue packets faster.
  27183. */
  27184. +#ifdef CONFIG_PREEMPT_RT_FULL
  27185. + contended = true;
  27186. +#else
  27187. contended = qdisc_is_running(q);
  27188. +#endif
  27189. if (unlikely(contended))
  27190. spin_lock(&q->busylock);
  27191. @@ -2880,9 +2892,44 @@
  27192. #define skb_update_prio(skb)
  27193. #endif
  27194. +#ifdef CONFIG_PREEMPT_RT_FULL
  27195. +
  27196. +static inline int xmit_rec_read(void)
  27197. +{
  27198. + return current->xmit_recursion;
  27199. +}
  27200. +
  27201. +static inline void xmit_rec_inc(void)
  27202. +{
  27203. + current->xmit_recursion++;
  27204. +}
  27205. +
  27206. +static inline void xmit_rec_dec(void)
  27207. +{
  27208. + current->xmit_recursion--;
  27209. +}
  27210. +
  27211. +#else
  27212. +
  27213. DEFINE_PER_CPU(int, xmit_recursion);
  27214. EXPORT_SYMBOL(xmit_recursion);
  27215. +static inline int xmit_rec_read(void)
  27216. +{
  27217. + return __this_cpu_read(xmit_recursion);
  27218. +}
  27219. +
  27220. +static inline void xmit_rec_inc(void)
  27221. +{
  27222. + __this_cpu_inc(xmit_recursion);
  27223. +}
  27224. +
  27225. +static inline int xmit_rec_dec(void)
  27226. +{
  27227. + __this_cpu_dec(xmit_recursion);
  27228. +}
  27229. +#endif
  27230. +
  27231. #define RECURSION_LIMIT 10
  27232. /**
  27233. @@ -2984,7 +3031,7 @@
  27234. if (txq->xmit_lock_owner != cpu) {
  27235. - if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
  27236. + if (xmit_rec_read() > RECURSION_LIMIT)
  27237. goto recursion_alert;
  27238. skb = validate_xmit_skb(skb, dev);
  27239. @@ -2994,9 +3041,9 @@
  27240. HARD_TX_LOCK(dev, txq, cpu);
  27241. if (!netif_xmit_stopped(txq)) {
  27242. - __this_cpu_inc(xmit_recursion);
  27243. + xmit_rec_inc();
  27244. skb = dev_hard_start_xmit(skb, dev, txq, &rc);
  27245. - __this_cpu_dec(xmit_recursion);
  27246. + xmit_rec_dec();
  27247. if (dev_xmit_complete(rc)) {
  27248. HARD_TX_UNLOCK(dev, txq);
  27249. goto out;
  27250. @@ -3370,6 +3417,7 @@
  27251. rps_unlock(sd);
  27252. local_irq_restore(flags);
  27253. + preempt_check_resched_rt();
  27254. atomic_long_inc(&skb->dev->rx_dropped);
  27255. kfree_skb(skb);
  27256. @@ -3388,7 +3436,7 @@
  27257. struct rps_dev_flow voidflow, *rflow = &voidflow;
  27258. int cpu;
  27259. - preempt_disable();
  27260. + migrate_disable();
  27261. rcu_read_lock();
  27262. cpu = get_rps_cpu(skb->dev, skb, &rflow);
  27263. @@ -3398,13 +3446,13 @@
  27264. ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
  27265. rcu_read_unlock();
  27266. - preempt_enable();
  27267. + migrate_enable();
  27268. } else
  27269. #endif
  27270. {
  27271. unsigned int qtail;
  27272. - ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
  27273. - put_cpu();
  27274. + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
  27275. + put_cpu_light();
  27276. }
  27277. return ret;
  27278. }
  27279. @@ -3438,16 +3486,44 @@
  27280. trace_netif_rx_ni_entry(skb);
  27281. - preempt_disable();
  27282. + local_bh_disable();
  27283. err = netif_rx_internal(skb);
  27284. - if (local_softirq_pending())
  27285. - do_softirq();
  27286. - preempt_enable();
  27287. + local_bh_enable();
  27288. return err;
  27289. }
  27290. EXPORT_SYMBOL(netif_rx_ni);
  27291. +#ifdef CONFIG_PREEMPT_RT_FULL
  27292. +/*
  27293. + * RT runs ksoftirqd as a real time thread and the root_lock is a
  27294. + * "sleeping spinlock". If the trylock fails then we can go into an
  27295. + * infinite loop when ksoftirqd preempted the task which actually
  27296. + * holds the lock, because we requeue q and raise NET_TX softirq
  27297. + * causing ksoftirqd to loop forever.
  27298. + *
  27299. + * It's safe to use spin_lock on RT here as softirqs run in thread
  27300. + * context and cannot deadlock against the thread which is holding
  27301. + * root_lock.
  27302. + *
  27303. + * On !RT the trylock might fail, but there we bail out from the
  27304. + * softirq loop after 10 attempts which we can't do on RT. And the
  27305. + * task holding root_lock cannot be preempted, so the only downside of
  27306. + * that trylock is that we need 10 loops to decide that we should have
  27307. + * given up in the first one :)
  27308. + */
  27309. +static inline int take_root_lock(spinlock_t *lock)
  27310. +{
  27311. + spin_lock(lock);
  27312. + return 1;
  27313. +}
  27314. +#else
  27315. +static inline int take_root_lock(spinlock_t *lock)
  27316. +{
  27317. + return spin_trylock(lock);
  27318. +}
  27319. +#endif
  27320. +
  27321. static void net_tx_action(struct softirq_action *h)
  27322. {
  27323. struct softnet_data *sd = this_cpu_ptr(&softnet_data);
  27324. @@ -3489,7 +3565,7 @@
  27325. head = head->next_sched;
  27326. root_lock = qdisc_lock(q);
  27327. - if (spin_trylock(root_lock)) {
  27328. + if (take_root_lock(root_lock)) {
  27329. smp_mb__before_atomic();
  27330. clear_bit(__QDISC_STATE_SCHED,
  27331. &q->state);
  27332. @@ -3886,7 +3962,7 @@
  27333. skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
  27334. if (skb->dev == dev) {
  27335. __skb_unlink(skb, &sd->input_pkt_queue);
  27336. - kfree_skb(skb);
  27337. + __skb_queue_tail(&sd->tofree_queue, skb);
  27338. input_queue_head_incr(sd);
  27339. }
  27340. }
  27341. @@ -3895,10 +3971,13 @@
  27342. skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
  27343. if (skb->dev == dev) {
  27344. __skb_unlink(skb, &sd->process_queue);
  27345. - kfree_skb(skb);
  27346. + __skb_queue_tail(&sd->tofree_queue, skb);
  27347. input_queue_head_incr(sd);
  27348. }
  27349. }
  27350. +
  27351. + if (!skb_queue_empty(&sd->tofree_queue))
  27352. + raise_softirq_irqoff(NET_RX_SOFTIRQ);
  27353. }
  27354. static int napi_gro_complete(struct sk_buff *skb)
  27355. @@ -4350,6 +4429,7 @@
  27356. sd->rps_ipi_list = NULL;
  27357. local_irq_enable();
  27358. + preempt_check_resched_rt();
  27359. /* Send pending IPI's to kick RPS processing on remote cpus. */
  27360. while (remsd) {
  27361. @@ -4363,6 +4443,7 @@
  27362. } else
  27363. #endif
  27364. local_irq_enable();
  27365. + preempt_check_resched_rt();
  27366. }
  27367. static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
  27368. @@ -4444,9 +4525,11 @@
  27369. local_irq_save(flags);
  27370. ____napi_schedule(this_cpu_ptr(&softnet_data), n);
  27371. local_irq_restore(flags);
  27372. + preempt_check_resched_rt();
  27373. }
  27374. EXPORT_SYMBOL(__napi_schedule);
  27375. +#ifndef CONFIG_PREEMPT_RT_FULL
  27376. /**
  27377. * __napi_schedule_irqoff - schedule for receive
  27378. * @n: entry to schedule
  27379. @@ -4458,6 +4541,7 @@
  27380. ____napi_schedule(this_cpu_ptr(&softnet_data), n);
  27381. }
  27382. EXPORT_SYMBOL(__napi_schedule_irqoff);
  27383. +#endif
  27384. void __napi_complete(struct napi_struct *n)
  27385. {
  27386. @@ -4682,13 +4766,21 @@
  27387. struct softnet_data *sd = this_cpu_ptr(&softnet_data);
  27388. unsigned long time_limit = jiffies + 2;
  27389. int budget = netdev_budget;
  27390. + struct sk_buff_head tofree_q;
  27391. + struct sk_buff *skb;
  27392. LIST_HEAD(list);
  27393. LIST_HEAD(repoll);
  27394. + __skb_queue_head_init(&tofree_q);
  27395. +
  27396. local_irq_disable();
  27397. + skb_queue_splice_init(&sd->tofree_queue, &tofree_q);
  27398. list_splice_init(&sd->poll_list, &list);
  27399. local_irq_enable();
  27400. + while ((skb = __skb_dequeue(&tofree_q)))
  27401. + kfree_skb(skb);
  27402. +
  27403. for (;;) {
  27404. struct napi_struct *n;
  27405. @@ -4718,7 +4810,7 @@
  27406. list_splice_tail(&repoll, &list);
  27407. list_splice(&list, &sd->poll_list);
  27408. if (!list_empty(&sd->poll_list))
  27409. - __raise_softirq_irqoff(NET_RX_SOFTIRQ);
  27410. + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ);
  27411. net_rps_action_and_irq_enable(sd);
  27412. }
  27413. @@ -6932,7 +7024,7 @@
  27414. void synchronize_net(void)
  27415. {
  27416. might_sleep();
  27417. - if (rtnl_is_locked())
  27418. + if (rtnl_is_locked() && !IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
  27419. synchronize_rcu_expedited();
  27420. else
  27421. synchronize_rcu();
  27422. @@ -7173,16 +7265,20 @@
  27423. raise_softirq_irqoff(NET_TX_SOFTIRQ);
  27424. local_irq_enable();
  27425. + preempt_check_resched_rt();
  27426. /* Process offline CPU's input_pkt_queue */
  27427. while ((skb = __skb_dequeue(&oldsd->process_queue))) {
  27428. netif_rx_ni(skb);
  27429. input_queue_head_incr(oldsd);
  27430. }
  27431. - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
  27432. + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
  27433. netif_rx_ni(skb);
  27434. input_queue_head_incr(oldsd);
  27435. }
  27436. + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
  27437. + kfree_skb(skb);
  27438. + }
  27439. return NOTIFY_OK;
  27440. }
  27441. @@ -7484,8 +7580,9 @@
  27442. for_each_possible_cpu(i) {
  27443. struct softnet_data *sd = &per_cpu(softnet_data, i);
  27444. - skb_queue_head_init(&sd->input_pkt_queue);
  27445. - skb_queue_head_init(&sd->process_queue);
  27446. + skb_queue_head_init_raw(&sd->input_pkt_queue);
  27447. + skb_queue_head_init_raw(&sd->process_queue);
  27448. + skb_queue_head_init_raw(&sd->tofree_queue);
  27449. INIT_LIST_HEAD(&sd->poll_list);
  27450. sd->output_queue_tailp = &sd->output_queue;
  27451. #ifdef CONFIG_RPS
  27452. diff -Nur linux-4.1.39.orig/net/core/skbuff.c linux-4.1.39/net/core/skbuff.c
  27453. --- linux-4.1.39.orig/net/core/skbuff.c 2017-03-13 21:04:36.000000000 +0100
  27454. +++ linux-4.1.39/net/core/skbuff.c 2017-04-18 17:56:30.657398836 +0200
  27455. @@ -63,6 +63,7 @@
  27456. #include <linux/errqueue.h>
  27457. #include <linux/prefetch.h>
  27458. #include <linux/if_vlan.h>
  27459. +#include <linux/locallock.h>
  27460. #include <net/protocol.h>
  27461. #include <net/dst.h>
  27462. @@ -358,6 +359,8 @@
  27463. };
  27464. static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
  27465. static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
  27466. +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
  27467. +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock);
  27468. static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
  27469. gfp_t gfp_mask)
  27470. @@ -435,9 +438,9 @@
  27471. unsigned long flags;
  27472. void *data;
  27473. - local_irq_save(flags);
  27474. + local_lock_irqsave(netdev_alloc_lock, flags);
  27475. data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask);
  27476. - local_irq_restore(flags);
  27477. + local_unlock_irqrestore(netdev_alloc_lock, flags);
  27478. return data;
  27479. }
  27480. @@ -456,7 +459,12 @@
  27481. static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  27482. {
  27483. - return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
  27484. + void *data;
  27485. +
  27486. + local_lock(napi_alloc_cache_lock);
  27487. + data = __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
  27488. + local_unlock(napi_alloc_cache_lock);
  27489. + return data;
  27490. }
  27491. void *napi_alloc_frag(unsigned int fragsz)
  27492. diff -Nur linux-4.1.39.orig/net/core/sock.c linux-4.1.39/net/core/sock.c
  27493. --- linux-4.1.39.orig/net/core/sock.c 2017-03-13 21:04:36.000000000 +0100
  27494. +++ linux-4.1.39/net/core/sock.c 2017-04-18 17:56:30.657398836 +0200
  27495. @@ -2369,12 +2369,11 @@
  27496. if (sk->sk_lock.owned)
  27497. __lock_sock(sk);
  27498. sk->sk_lock.owned = 1;
  27499. - spin_unlock(&sk->sk_lock.slock);
  27500. + spin_unlock_bh(&sk->sk_lock.slock);
  27501. /*
  27502. * The sk_lock has mutex_lock() semantics here:
  27503. */
  27504. mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
  27505. - local_bh_enable();
  27506. }
  27507. EXPORT_SYMBOL(lock_sock_nested);
  27508. diff -Nur linux-4.1.39.orig/net/ipv4/icmp.c linux-4.1.39/net/ipv4/icmp.c
  27509. --- linux-4.1.39.orig/net/ipv4/icmp.c 2017-03-13 21:04:36.000000000 +0100
  27510. +++ linux-4.1.39/net/ipv4/icmp.c 2017-04-18 17:56:30.657398836 +0200
  27511. @@ -69,6 +69,7 @@
  27512. #include <linux/jiffies.h>
  27513. #include <linux/kernel.h>
  27514. #include <linux/fcntl.h>
  27515. +#include <linux/sysrq.h>
  27516. #include <linux/socket.h>
  27517. #include <linux/in.h>
  27518. #include <linux/inet.h>
  27519. @@ -77,6 +78,7 @@
  27520. #include <linux/string.h>
  27521. #include <linux/netfilter_ipv4.h>
  27522. #include <linux/slab.h>
  27523. +#include <linux/locallock.h>
  27524. #include <net/snmp.h>
  27525. #include <net/ip.h>
  27526. #include <net/route.h>
  27527. @@ -203,6 +205,8 @@
  27528. *
  27529. * On SMP we have one ICMP socket per-cpu.
  27530. */
  27531. +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock);
  27532. +
  27533. static struct sock *icmp_sk(struct net *net)
  27534. {
  27535. return *this_cpu_ptr(net->ipv4.icmp_sk);
  27536. @@ -214,12 +218,14 @@
  27537. local_bh_disable();
  27538. + local_lock(icmp_sk_lock);
  27539. sk = icmp_sk(net);
  27540. if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
  27541. /* This can happen if the output path signals a
  27542. * dst_link_failure() for an outgoing ICMP packet.
  27543. */
  27544. + local_unlock(icmp_sk_lock);
  27545. local_bh_enable();
  27546. return NULL;
  27547. }
  27548. @@ -229,6 +235,7 @@
  27549. static inline void icmp_xmit_unlock(struct sock *sk)
  27550. {
  27551. spin_unlock_bh(&sk->sk_lock.slock);
  27552. + local_unlock(icmp_sk_lock);
  27553. }
  27554. int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
  27555. @@ -356,6 +363,7 @@
  27556. struct sock *sk;
  27557. struct sk_buff *skb;
  27558. + local_lock(icmp_sk_lock);
  27559. sk = icmp_sk(dev_net((*rt)->dst.dev));
  27560. if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param,
  27561. icmp_param->data_len+icmp_param->head_len,
  27562. @@ -378,6 +386,7 @@
  27563. skb->ip_summed = CHECKSUM_NONE;
  27564. ip_push_pending_frames(sk, fl4);
  27565. }
  27566. + local_unlock(icmp_sk_lock);
  27567. }
  27568. /*
  27569. @@ -867,6 +876,30 @@
  27570. }
  27571. /*
  27572. + * 32bit and 64bit have different timestamp length, so we check for
  27573. + * the cookie at offset 20 and verify it is repeated at offset 50
  27574. + */
  27575. +#define CO_POS0 20
  27576. +#define CO_POS1 50
  27577. +#define CO_SIZE sizeof(int)
  27578. +#define ICMP_SYSRQ_SIZE 57
  27579. +
  27580. +/*
  27581. + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
  27582. + * pattern and if it matches send the next byte as a trigger to sysrq.
  27583. + */
  27584. +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
  27585. +{
  27586. + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
  27587. + char *p = skb->data;
  27588. +
  27589. + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
  27590. + !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
  27591. + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
  27592. + handle_sysrq(p[CO_POS0 + CO_SIZE]);
  27593. +}
  27594. +
  27595. +/*
  27596. * Handle ICMP_ECHO ("ping") requests.
  27597. *
  27598. * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
  27599. @@ -893,6 +926,11 @@
  27600. icmp_param.data_len = skb->len;
  27601. icmp_param.head_len = sizeof(struct icmphdr);
  27602. icmp_reply(&icmp_param, skb);
  27603. +
  27604. + if (skb->len == ICMP_SYSRQ_SIZE &&
  27605. + net->ipv4.sysctl_icmp_echo_sysrq) {
  27606. + icmp_check_sysrq(net, skb);
  27607. + }
  27608. }
  27609. /* should there be an ICMP stat for ignored echos? */
  27610. return true;
  27611. diff -Nur linux-4.1.39.orig/net/ipv4/sysctl_net_ipv4.c linux-4.1.39/net/ipv4/sysctl_net_ipv4.c
  27612. --- linux-4.1.39.orig/net/ipv4/sysctl_net_ipv4.c 2017-03-13 21:04:36.000000000 +0100
  27613. +++ linux-4.1.39/net/ipv4/sysctl_net_ipv4.c 2017-04-18 17:56:30.657398836 +0200
  27614. @@ -779,6 +779,13 @@
  27615. .proc_handler = proc_dointvec
  27616. },
  27617. {
  27618. + .procname = "icmp_echo_sysrq",
  27619. + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
  27620. + .maxlen = sizeof(int),
  27621. + .mode = 0644,
  27622. + .proc_handler = proc_dointvec
  27623. + },
  27624. + {
  27625. .procname = "icmp_ignore_bogus_error_responses",
  27626. .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
  27627. .maxlen = sizeof(int),
  27628. diff -Nur linux-4.1.39.orig/net/ipv4/tcp_ipv4.c linux-4.1.39/net/ipv4/tcp_ipv4.c
  27629. --- linux-4.1.39.orig/net/ipv4/tcp_ipv4.c 2017-03-13 21:04:36.000000000 +0100
  27630. +++ linux-4.1.39/net/ipv4/tcp_ipv4.c 2017-04-18 17:56:30.657398836 +0200
  27631. @@ -62,6 +62,7 @@
  27632. #include <linux/init.h>
  27633. #include <linux/times.h>
  27634. #include <linux/slab.h>
  27635. +#include <linux/locallock.h>
  27636. #include <net/net_namespace.h>
  27637. #include <net/icmp.h>
  27638. @@ -563,6 +564,7 @@
  27639. }
  27640. EXPORT_SYMBOL(tcp_v4_send_check);
  27641. +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock);
  27642. /*
  27643. * This routine will send an RST to the other tcp.
  27644. *
  27645. @@ -684,10 +686,13 @@
  27646. arg.bound_dev_if = sk->sk_bound_dev_if;
  27647. arg.tos = ip_hdr(skb)->tos;
  27648. +
  27649. + local_lock(tcp_sk_lock);
  27650. ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
  27651. skb, &TCP_SKB_CB(skb)->header.h4.opt,
  27652. ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  27653. &arg, arg.iov[0].iov_len);
  27654. + local_unlock(tcp_sk_lock);
  27655. TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
  27656. TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
  27657. @@ -769,10 +774,12 @@
  27658. if (oif)
  27659. arg.bound_dev_if = oif;
  27660. arg.tos = tos;
  27661. + local_lock(tcp_sk_lock);
  27662. ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
  27663. skb, &TCP_SKB_CB(skb)->header.h4.opt,
  27664. ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  27665. &arg, arg.iov[0].iov_len);
  27666. + local_unlock(tcp_sk_lock);
  27667. TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
  27668. }
  27669. diff -Nur linux-4.1.39.orig/net/mac80211/rx.c linux-4.1.39/net/mac80211/rx.c
  27670. --- linux-4.1.39.orig/net/mac80211/rx.c 2017-03-13 21:04:36.000000000 +0100
  27671. +++ linux-4.1.39/net/mac80211/rx.c 2017-04-18 17:56:30.657398836 +0200
  27672. @@ -3580,7 +3580,7 @@
  27673. struct ieee80211_supported_band *sband;
  27674. struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
  27675. - WARN_ON_ONCE(softirq_count() == 0);
  27676. + WARN_ON_ONCE_NONRT(softirq_count() == 0);
  27677. if (WARN_ON(status->band >= IEEE80211_NUM_BANDS))
  27678. goto drop;
  27679. diff -Nur linux-4.1.39.orig/net/netfilter/core.c linux-4.1.39/net/netfilter/core.c
  27680. --- linux-4.1.39.orig/net/netfilter/core.c 2017-03-13 21:04:36.000000000 +0100
  27681. +++ linux-4.1.39/net/netfilter/core.c 2017-04-18 17:56:30.657398836 +0200
  27682. @@ -22,11 +22,17 @@
  27683. #include <linux/proc_fs.h>
  27684. #include <linux/mutex.h>
  27685. #include <linux/slab.h>
  27686. +#include <linux/locallock.h>
  27687. #include <net/net_namespace.h>
  27688. #include <net/sock.h>
  27689. #include "nf_internals.h"
  27690. +#ifdef CONFIG_PREEMPT_RT_BASE
  27691. +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
  27692. +EXPORT_PER_CPU_SYMBOL(xt_write_lock);
  27693. +#endif
  27694. +
  27695. static DEFINE_MUTEX(afinfo_mutex);
  27696. const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
  27697. diff -Nur linux-4.1.39.orig/net/packet/af_packet.c linux-4.1.39/net/packet/af_packet.c
  27698. --- linux-4.1.39.orig/net/packet/af_packet.c 2017-03-13 21:04:36.000000000 +0100
  27699. +++ linux-4.1.39/net/packet/af_packet.c 2017-04-18 17:56:30.657398836 +0200
  27700. @@ -63,6 +63,7 @@
  27701. #include <linux/if_packet.h>
  27702. #include <linux/wireless.h>
  27703. #include <linux/kernel.h>
  27704. +#include <linux/delay.h>
  27705. #include <linux/kmod.h>
  27706. #include <linux/slab.h>
  27707. #include <linux/vmalloc.h>
  27708. @@ -698,7 +699,7 @@
  27709. if (BLOCK_NUM_PKTS(pbd)) {
  27710. while (atomic_read(&pkc->blk_fill_in_prog)) {
  27711. /* Waiting for skb_copy_bits to finish... */
  27712. - cpu_relax();
  27713. + cpu_chill();
  27714. }
  27715. }
  27716. @@ -960,7 +961,7 @@
  27717. if (!(status & TP_STATUS_BLK_TMO)) {
  27718. while (atomic_read(&pkc->blk_fill_in_prog)) {
  27719. /* Waiting for skb_copy_bits to finish... */
  27720. - cpu_relax();
  27721. + cpu_chill();
  27722. }
  27723. }
  27724. prb_close_block(pkc, pbd, po, status);
  27725. diff -Nur linux-4.1.39.orig/net/rds/ib_rdma.c linux-4.1.39/net/rds/ib_rdma.c
  27726. --- linux-4.1.39.orig/net/rds/ib_rdma.c 2017-03-13 21:04:36.000000000 +0100
  27727. +++ linux-4.1.39/net/rds/ib_rdma.c 2017-04-18 17:56:30.657398836 +0200
  27728. @@ -34,6 +34,7 @@
  27729. #include <linux/slab.h>
  27730. #include <linux/rculist.h>
  27731. #include <linux/llist.h>
  27732. +#include <linux/delay.h>
  27733. #include "rds.h"
  27734. #include "ib.h"
  27735. @@ -286,7 +287,7 @@
  27736. for_each_online_cpu(cpu) {
  27737. flag = &per_cpu(clean_list_grace, cpu);
  27738. while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
  27739. - cpu_relax();
  27740. + cpu_chill();
  27741. }
  27742. }
  27743. diff -Nur linux-4.1.39.orig/net/sched/sch_generic.c linux-4.1.39/net/sched/sch_generic.c
  27744. --- linux-4.1.39.orig/net/sched/sch_generic.c 2017-03-13 21:04:36.000000000 +0100
  27745. +++ linux-4.1.39/net/sched/sch_generic.c 2017-04-18 17:56:30.657398836 +0200
  27746. @@ -899,7 +899,7 @@
  27747. /* Wait for outstanding qdisc_run calls. */
  27748. list_for_each_entry(dev, head, close_list)
  27749. while (some_qdisc_is_busy(dev))
  27750. - yield();
  27751. + msleep(1);
  27752. }
  27753. void dev_deactivate(struct net_device *dev)
  27754. diff -Nur linux-4.1.39.orig/net/sunrpc/svc_xprt.c linux-4.1.39/net/sunrpc/svc_xprt.c
  27755. --- linux-4.1.39.orig/net/sunrpc/svc_xprt.c 2017-03-13 21:04:36.000000000 +0100
  27756. +++ linux-4.1.39/net/sunrpc/svc_xprt.c 2017-04-18 17:56:30.657398836 +0200
  27757. @@ -341,7 +341,7 @@
  27758. goto out;
  27759. }
  27760. - cpu = get_cpu();
  27761. + cpu = get_cpu_light();
  27762. pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
  27763. atomic_long_inc(&pool->sp_stats.packets);
  27764. @@ -377,7 +377,7 @@
  27765. atomic_long_inc(&pool->sp_stats.threads_woken);
  27766. wake_up_process(rqstp->rq_task);
  27767. - put_cpu();
  27768. + put_cpu_light();
  27769. goto out;
  27770. }
  27771. rcu_read_unlock();
  27772. @@ -398,7 +398,7 @@
  27773. goto redo_search;
  27774. }
  27775. rqstp = NULL;
  27776. - put_cpu();
  27777. + put_cpu_light();
  27778. out:
  27779. trace_svc_xprt_do_enqueue(xprt, rqstp);
  27780. }
  27781. diff -Nur linux-4.1.39.orig/scripts/mkcompile_h linux-4.1.39/scripts/mkcompile_h
  27782. --- linux-4.1.39.orig/scripts/mkcompile_h 2017-03-13 21:04:36.000000000 +0100
  27783. +++ linux-4.1.39/scripts/mkcompile_h 2017-04-18 17:56:30.657398836 +0200
  27784. @@ -4,7 +4,8 @@
  27785. ARCH=$2
  27786. SMP=$3
  27787. PREEMPT=$4
  27788. -CC=$5
  27789. +RT=$5
  27790. +CC=$6
  27791. vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
  27792. @@ -57,6 +58,7 @@
  27793. CONFIG_FLAGS=""
  27794. if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
  27795. if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
  27796. +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
  27797. UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
  27798. # Truncate to maximum length
  27799. diff -Nur linux-4.1.39.orig/sound/core/pcm_native.c linux-4.1.39/sound/core/pcm_native.c
  27800. --- linux-4.1.39.orig/sound/core/pcm_native.c 2017-03-13 21:04:36.000000000 +0100
  27801. +++ linux-4.1.39/sound/core/pcm_native.c 2017-04-18 17:56:30.661398992 +0200
  27802. @@ -135,7 +135,7 @@
  27803. void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream)
  27804. {
  27805. if (!substream->pcm->nonatomic)
  27806. - local_irq_disable();
  27807. + local_irq_disable_nort();
  27808. snd_pcm_stream_lock(substream);
  27809. }
  27810. EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq);
  27811. @@ -150,7 +150,7 @@
  27812. {
  27813. snd_pcm_stream_unlock(substream);
  27814. if (!substream->pcm->nonatomic)
  27815. - local_irq_enable();
  27816. + local_irq_enable_nort();
  27817. }
  27818. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq);
  27819. @@ -158,7 +158,7 @@
  27820. {
  27821. unsigned long flags = 0;
  27822. if (!substream->pcm->nonatomic)
  27823. - local_irq_save(flags);
  27824. + local_irq_save_nort(flags);
  27825. snd_pcm_stream_lock(substream);
  27826. return flags;
  27827. }
  27828. @@ -176,7 +176,7 @@
  27829. {
  27830. snd_pcm_stream_unlock(substream);
  27831. if (!substream->pcm->nonatomic)
  27832. - local_irq_restore(flags);
  27833. + local_irq_restore_nort(flags);
  27834. }
  27835. EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore);
  27836. diff -Nur linux-4.1.39.orig/sound/soc/intel/atom/sst/sst.c linux-4.1.39/sound/soc/intel/atom/sst/sst.c
  27837. --- linux-4.1.39.orig/sound/soc/intel/atom/sst/sst.c 2017-03-13 21:04:36.000000000 +0100
  27838. +++ linux-4.1.39/sound/soc/intel/atom/sst/sst.c 2017-04-18 17:56:30.661398992 +0200
  27839. @@ -368,8 +368,8 @@
  27840. * initialize by FW or driver when firmware is loaded
  27841. */
  27842. spin_lock_irqsave(&ctx->ipc_spin_lock, irq_flags);
  27843. - sst_shim_write64(shim, SST_IMRX, shim_regs->imrx),
  27844. - sst_shim_write64(shim, SST_CSR, shim_regs->csr),
  27845. + sst_shim_write64(shim, SST_IMRX, shim_regs->imrx);
  27846. + sst_shim_write64(shim, SST_CSR, shim_regs->csr);
  27847. spin_unlock_irqrestore(&ctx->ipc_spin_lock, irq_flags);
  27848. }
  27849. diff -Nur linux-4.1.39.orig/virt/kvm/async_pf.c linux-4.1.39/virt/kvm/async_pf.c
  27850. --- linux-4.1.39.orig/virt/kvm/async_pf.c 2017-03-13 21:04:36.000000000 +0100
  27851. +++ linux-4.1.39/virt/kvm/async_pf.c 2017-04-18 17:56:30.661398992 +0200
  27852. @@ -94,8 +94,8 @@
  27853. trace_kvm_async_pf_completed(addr, gva);
  27854. - if (waitqueue_active(&vcpu->wq))
  27855. - wake_up_interruptible(&vcpu->wq);
  27856. + if (swaitqueue_active(&vcpu->wq))
  27857. + swait_wake_interruptible(&vcpu->wq);
  27858. mmput(mm);
  27859. kvm_put_kvm(vcpu->kvm);
  27860. diff -Nur linux-4.1.39.orig/virt/kvm/kvm_main.c linux-4.1.39/virt/kvm/kvm_main.c
  27861. --- linux-4.1.39.orig/virt/kvm/kvm_main.c 2017-03-13 21:04:36.000000000 +0100
  27862. +++ linux-4.1.39/virt/kvm/kvm_main.c 2017-04-18 17:56:30.661398992 +0200
  27863. @@ -220,7 +220,7 @@
  27864. vcpu->kvm = kvm;
  27865. vcpu->vcpu_id = id;
  27866. vcpu->pid = NULL;
  27867. - init_waitqueue_head(&vcpu->wq);
  27868. + init_swait_head(&vcpu->wq);
  27869. kvm_async_pf_vcpu_init(vcpu);
  27870. page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  27871. @@ -1782,7 +1782,7 @@
  27872. void kvm_vcpu_block(struct kvm_vcpu *vcpu)
  27873. {
  27874. ktime_t start, cur;
  27875. - DEFINE_WAIT(wait);
  27876. + DEFINE_SWAITER(wait);
  27877. bool waited = false;
  27878. start = cur = ktime_get();
  27879. @@ -1803,7 +1803,7 @@
  27880. }
  27881. for (;;) {
  27882. - prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
  27883. + swait_prepare(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
  27884. if (kvm_vcpu_check_block(vcpu) < 0)
  27885. break;
  27886. @@ -1812,7 +1812,7 @@
  27887. schedule();
  27888. }
  27889. - finish_wait(&vcpu->wq, &wait);
  27890. + swait_finish(&vcpu->wq, &wait);
  27891. cur = ktime_get();
  27892. out:
  27893. @@ -1828,11 +1828,11 @@
  27894. {
  27895. int me;
  27896. int cpu = vcpu->cpu;
  27897. - wait_queue_head_t *wqp;
  27898. + struct swait_head *wqp;
  27899. wqp = kvm_arch_vcpu_wq(vcpu);
  27900. - if (waitqueue_active(wqp)) {
  27901. - wake_up_interruptible(wqp);
  27902. + if (swaitqueue_active(wqp)) {
  27903. + swait_wake_interruptible(wqp);
  27904. ++vcpu->stat.halt_wakeup;
  27905. }
  27906. @@ -1933,7 +1933,7 @@
  27907. continue;
  27908. if (vcpu == me)
  27909. continue;
  27910. - if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
  27911. + if (swaitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
  27912. continue;
  27913. if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
  27914. continue;